def list(self, session=None): """See FileCacherBackend.list(). This implementation also accepts an additional (and optional) parameter: a SQLAlchemy session to use to query the database. session (Session): the session to use; if not given a temporary one will be created and used. """ def _list(session): """Do the work assuming session is valid. """ return list( (x.digest, x.description) for x in session.query(FSObject)) if session is not None: return _list(session) else: with SessionGen() as session: return _list(session)
def put_file(self, digest, desc=""): """See FileCacherBackend.put_file(). """ try: with SessionGen() as session: fso = FSObject.get_from_digest(digest, session) # Check digest uniqueness if fso is not None: logger.debug("File %s already stored on database, not " "sending it again." % digest) session.rollback() return None # If it is not already present, copy the file into the # lobject else: fso = FSObject(description=desc) fso.digest = digest session.add(fso) logger.debug("File %s stored on the database." % digest) # FIXME There is a remote possibility that someone # will try to access this file, believing it has # already been stored (since its FSObject exists), # while we're still sending its content. lobject = fso.get_lobject(mode='wb') session.commit() return lobject except IntegrityError: logger.warning("File %s caused an IntegrityError, ignoring..." % digest)
def do_import(self): """Get the team from the TeamLoader and store it.""" # Get the team team = self.loader.get_team() if team is None: return False # Store logger.info("Creating team on the database.") with SessionGen() as session: try: team = self._team_to_db(session, team) except ImportDataError as e: logger.error(str(e)) logger.info("Error while importing, no changes were made.") return False session.commit() team_id = team.id logger.info("Import finished (new team id: %s).", team_id) return True
def assertContestInDb(self, name, description, task_names_and_titles, usernames_and_last_names): """Assert that the contest with the given data is in the DB The query is done by contest name, and to avoid caching, we query from a brand new session. From contest_id on, parameters are checked only if not None. """ with SessionGen() as session: db_contests = session.query(Contest) \ .filter(Contest.name == name).all() self.assertEqual(len(db_contests), 1) c = db_contests[0] self.assertEqual(c.name, name) self.assertEqual(c.description, description) six.assertCountEqual(self, [(t.name, t.title) for t in c.tasks], task_names_and_titles) six.assertCountEqual(self, [(u.user.username, u.user.last_name) for u in c.participations], usernames_and_last_names)
def __init__(self, contest_ids, export_target, dump_files, dump_model, skip_generated, skip_submissions, skip_user_tests, skip_users, skip_print_jobs): if contest_ids is None: with SessionGen() as session: contests = session.query(Contest).all() self.contests_ids = [contest.id for contest in contests] if not skip_users: users = session.query(User).all() self.users_ids = [user.id for user in users] tasks = session.query(Task)\ .filter(Task.contest_id.is_(None)).all() else: self.user_ids = [] self.tasks_ids = [task.id for task in tasks] else: # FIXME: this is ATM broken, because if you export a contest, you # then export the users who participated in it and then all of the # contests those users participated in. self.contests_ids = contest_ids self.users_ids = [] self.tasks_ids = [] self.dump_files = dump_files self.dump_model = dump_model self.skip_generated = skip_generated self.skip_submissions = skip_submissions self.skip_user_tests = skip_user_tests self.skip_users = skip_users self.skip_print_jobs = skip_print_jobs self.export_target = export_target # If target is not provided, we use the contest's name. if len(export_target) == 0: self.export_target = "dump_%s.tar.gz" % date.today().isoformat() logger.warning("export_target not given, using \"%s\"", self.export_target) self.file_cacher = FileCacher()
def acquire_worker(self, operations): """Tries to assign an operation to an available worker. If no workers are available then this returns None, otherwise this returns the chosen worker. operations ([ESOperation]): the operations to assign to a worker. return (int|None): None if no workers are available, the worker assigned to the operation otherwise. """ # We look for an available worker. try: shard = self.find_worker(WorkerPool.WORKER_INACTIVE, require_connection=True, random_worker=True) except LookupError: self._workers_available_event.clear() return None # Then we fill the info for future memory. self._add_operations(shard, operations) logger.debug("Worker %s acquired.", shard) self._start_time[shard] = make_datetime() with SessionGen() as session: job_group_dict = \ JobGroup.from_operations(operations, session).export_to_dict() logger.info("Asking worker %s to %s.", shard, ", ".join("`%s'" % operation for operation in operations)) self._worker[shard].execute_job_group( job_group_dict=job_group_dict, callback=self._service.action_finished, plus=shard) return shard
def assertTaskInDb(self, task_name, title, contest_id, task_id=None, active_dataset_id=None, dataset_ids=None, dataset_descriptions=None, dataset_task_types=None): """Assert that the task with the given data is in the DB The query is done by task name, and to avoid caching, we query from a brand new session. From task_id on, parameters are checked only if not None. """ with SessionGen() as session: db_tasks = session.query(Task) \ .filter(Task.name == task_name).all() self.assertEqual(len(db_tasks), 1) t = db_tasks[0] self.assertEqual(t.name, task_name) self.assertEqual(t.title, title) self.assertEqual(t.contest_id, contest_id) if task_id is not None: self.assertEqual(t.id, task_id) if active_dataset_id is not None: self.assertEqual(active_dataset_id, t.active_dataset_id) if dataset_ids is not None: six.assertCountEqual(self, dataset_ids, (d.id for d in t.datasets)) if dataset_descriptions is not None: six.assertCountEqual(self, dataset_descriptions, (d.description for d in t.datasets)) if dataset_task_types is not None: six.assertCountEqual(self, dataset_task_types, (d.task_type for d in t.datasets))
def remove_submissions(contest_name, task_name, username): """ Remove the submissions of the given user in the given task and contest. This is intended for the automatic submission system: The user must have "autotester" in its name. Submissions are not removed if they are still in evaluation. Raise exception on failure. Return whether the submissions were deleted. """ if "autotester" not in username: raise Exception("Not removing submissions of user %s, " "they are not an autotester." % username) with SessionGen() as session: user = get_user(session, username) contest = get_contest(session, contest_name) participation = get_participation(session, contest, user) task = get_task(session, task_name, contest) submissions = get_user_task_submissions(session, participation, task) if not submissions: return True # If any submission is not yet scored, abort. for submission in submissions: result = submission.get_result() if result is None or result.score is None: return False # All submissions have been scored. Delete them. for submission in submissions: session.delete(submission) session.commit() return True
def add_user(first_name, last_name, username, password, method, is_hashed, email, timezone, preferred_languages): logger.info("Creating the user in the database.") pwd_generated = False if password is None: assert not is_hashed password = generate_random_password() pwd_generated = True if is_hashed: stored_password = build_password(password, method) else: stored_password = hash_password(password, method) if preferred_languages is None: preferred_languages = [] else: preferred_languages = list(lang.strip() for lang in preferred_languages.split(",") if lang.strip()) user = User(first_name=first_name, last_name=last_name, username=username, password=stored_password, email=email, timezone=timezone, preferred_languages=preferred_languages) try: with SessionGen() as session: session.add(user) session.commit() except IntegrityError: logger.error("A user with the given username already exists.") return False logger.info("User added%s. " "Use AddParticipation to add this user to a contest." % (" with password %s" % password if pwd_generated else "")) return True
def __init__(self, contest_id, export_target, dump_files, dump_model, skip_generated, skip_submissions, skip_user_tests): self.contest_id = contest_id self.dump_files = dump_files self.dump_model = dump_model self.skip_generated = skip_generated self.skip_submissions = skip_submissions self.skip_user_tests = skip_user_tests self.export_target = export_target # If target is not provided, we use the contest's name. if export_target == "": with SessionGen() as session: contest = Contest.get_from_id(self.contest_id, session) if contest is None: logger.critical("Please specify a valid contest id.") self.contest_id = None else: self.export_target = "dump_%s.tar.gz" % contest.name logger.warning("export_target not given, using \"%s\"" % self.export_target) self.file_cacher = FileCacher()
def submission_scored(self, submission_id): """Notice that a submission has been scored. Usually called by ScoringService when it's done with scoring a submission result. Since we don't trust anyone we verify that, and then send data about the score to the rankings. submission_id (int): the id of the submission that changed. dataset_id (int): the id of the dataset to use. """ with SessionGen() as session: submission = Submission.get_from_id(submission_id, session) if submission is None: logger.error( "[submission_scored] Received score request for " "unexistent submission id %s.", submission_id) raise KeyError("Submission not found.") if submission.participation.hidden: logger.info( "[submission_scored] Score for submission %d " "not sent because the participation is hidden.", submission_id) return if not submission.official: logger.info( "[submission_scored] Score for submission %d " "not sent because the submission is not official.", submission_id) return # Update RWS. for operation in self.operations_for_score(submission): self.enqueue(operation)
def remove_task(task_name): with SessionGen() as session: task = session.query(Task)\ .filter(Task.name == task_name).first() if not task: print("No task called `%s' found." % task_name) return if not ask(task_name): print("Not removing task `%s'." % task_name) return num = task.num contest_id = task.contest_id session.delete(task) session.commit() # Keeping the tasks' nums to the range 0... n - 1. if contest_id is not None: following_tasks = session.query(Task)\ .filter(Task.contest_id == contest_id)\ .filter(Task.num > num)\ .all() for task in following_tasks: task.num -= 1 session.commit() print("Task `%s' removed." % task_name)
def precache_files(self, contest_id): """RPC to ask the worker to precache of files in the contest. contest_id (int): the id of the contest """ # In order to avoid a long-living connection, first fetch the # complete list of files and then download the files; since # this is just pre-caching, possible race conditions are not # dangerous logger.info("Precaching files for contest %d.", contest_id) with SessionGen() as session: contest = Contest.get_from_id(contest_id, session) files = enumerate_files(session, contest, skip_submissions=True, skip_user_tests=True, skip_print_jobs=True) for digest in files: try: self.file_cacher.load(digest, if_needed=True) except KeyError: # No problem (at this stage) if we cannot find the # file pass logger.info("Precaching finished.")
def precache_files(self, contest_id): """RPC to ask the worker to precache of files in the contest. contest_id (int): the id of the contest """ lock = self.file_cacher.precache_lock() if lock is None: # Another worker is already precaching. Hence, this worker doesn't # need to do anything. logger.info( "Another worker is already precaching files for " "contest %d.", contest_id) return with lock: # In order to avoid a long-living connection, first fetch the # complete list of files and then download the files; since # this is just pre-caching, possible race conditions are not # dangerous logger.info("Precaching files for contest %d.", contest_id) with SessionGen() as session: contest = Contest.get_from_id(contest_id, session) files = enumerate_files(session, contest, skip_submissions=True, skip_user_tests=True, skip_print_jobs=True) for digest in files: try: self.file_cacher.cache_file(digest) except KeyError: # No problem (at this stage) if we cannot find the # file pass logger.info("Precaching finished.")
def do_import(self): """Get the user from the UserLoader and store it.""" # Get the user user = self.loader.get_user() if user is None: return # Store logger.info("Creating user on the database.") with SessionGen() as session: # Check whether the user already exists old_user = session.query(User) \ .filter(User.username == user.username) \ .first() if old_user is not None: logger.critical("The user already exists.") return session.add(user) session.commit() user_id = user.id logger.info("Import finished (new user id: %s).", user_id)
def main(): """Parse arguments and launch process. """ parser = argparse.ArgumentParser( description="Export CMS submissions to a folder.\n", formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("-c", "--contest-id", action="store", type=int, help="id of contest (default: all contests)") parser.add_argument("-t", "--task-id", action="store", type=int, help="id of task (default: all tasks)") parser.add_argument("-u", "--user-id", action="store", type=int, help="id of user (default: all users)") parser.add_argument("-s", "--submission-id", action="store", type=int, help="id of submission (default: all submissions)") parser.add_argument("--utf8", action="store_true", help="if set, the files will be encoded in utf8" " when possible") parser.add_argument("--add-info", action="store_true", help="if set, information on the submission will" " be added in the first lines of each file") parser.add_argument("--min-score", action="store", type=float, help="ignore submissions which scored strictly" " less than this (default: 0.0)", default=0.0) parser.add_argument("--filename", action="store", type=utf8_decoder, help="the filename format to use\n" "Variables:\n" " id: submission id\n" " file: filename without extension\n" " ext: filename extension\n" " time: submission timestamp\n" " user: username\n" " task: taskname\n" " score: raw score\n" " (default: {id}.{file}{ext})", default="{id}.{file}{ext}") parser.add_argument("output_dir", action="store", type=utf8_decoder, help="directory where to save the submissions") group = parser.add_mutually_exclusive_group(required=False) group.add_argument("--unique", action="store_true", help="if set, only the earliest best submission" " will be exported for each (user, task)") group.add_argument("--best", action="store_true", help="if set, only the best submissions will be" " exported for each (user, task)") args = parser.parse_args() if args.add_info and not args.utf8: logger.critical("If --add-info is specified, then --utf8 must be" " specified as well.") return 1 if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) if not os.path.isdir(args.output_dir): logger.critical("The output-dir parameter must point to a directory") return 1 with SessionGen() as session: q = session.query(Submission)\ .join(Submission.task)\ .join(Submission.files)\ .join(Submission.results)\ .join(SubmissionResult.dataset)\ .join(Submission.participation)\ .join(Participation.user)\ .filter(Dataset.id == Task.active_dataset_id)\ .filter(SubmissionResult.score >= args.min_score)\ .with_entities(Submission.id, Submission.language, Submission.timestamp, SubmissionResult.score, File.filename, File.digest, User.id, User.username, User.first_name, User.last_name, Task.id, Task.name) if args.contest_id: q = q.filter(Participation.contest_id == args.contest_id) if args.task_id: q = q.filter(Submission.task_id == args.task_id) if args.user_id: q = q.filter(Participation.user_id == args.user_id) if args.submission_id: q = q.filter(Submission.id == args.submission_id) results = q.all() if args.unique or args.best: results = filter_top_scoring(results, args.unique) print("%s file(s) will be created." % len(results)) if input("Continue? [Y/n] ").strip().lower() not in ["y", ""]: return 0 done = 0 for row in results: s_id, s_language, s_timestamp, sr_score, f_filename, f_digest, \ u_id, u_name, u_fname, u_lname, t_id, t_name = row timef = s_timestamp.strftime('%Y%m%dT%H%M%S') ext = languagemanager.get_language(s_language).source_extension \ if s_language else '.txt' filename_base, filename_ext = os.path.splitext( f_filename.replace('.%l', ext)) # "name" is a deprecated specifier with the same meaning as "file" filename = args.filename.format(id=s_id, file=filename_base, name=filename_base, ext=filename_ext, time=timef, user=u_name, task=t_name, score=sr_score) filename = os.path.join(args.output_dir, filename) if os.path.exists(filename): logger.warning("Skipping file '%s' because it already exists", filename) continue filedir = os.path.dirname(filename) if not os.path.exists(filedir): os.makedirs(filedir) if not os.path.isdir(filedir): logger.warning("%s is not a directory, skipped.", filedir) continue fso = FSObject.get_from_digest(f_digest, session) assert fso is not None with fso.get_lobject(mode="rb") as file_obj: data = file_obj.read() if args.utf8: try: data = utf8_decoder(data) except TypeError: logger.warning( "Could not guess encoding of file " "'%s'. Skipping.", filename) continue if args.add_info: data = TEMPLATE[ext] % (u_name, u_fname, u_lname, t_name, sr_score, s_timestamp) + data # Print utf8-encoded, possibly altered data with open(filename, "wt", encoding="utf-8") as f_out: f_out.write(data) else: # Print raw, untouched binary data with open(filename, "wb") as f_out: f_out.write(data) done += 1 print(done, "/", len(results)) return 0
def add_submission(contest_id, username, task_name, timestamp, files): file_cacher = FileCacher() with SessionGen() as session: participation = session.query(Participation)\ .join(Participation.user)\ .filter(Participation.contest_id == contest_id)\ .filter(User.username == username)\ .first() if participation is None: logging.critical("User `%s' does not exists or " "does not participate in the contest.", username) return False task = session.query(Task)\ .filter(Task.contest_id == contest_id)\ .filter(Task.name == task_name)\ .first() if task is None: logging.critical("Unable to find task `%s'.", task_name) return False elements = [format.filename for format in task.submission_format] for file_ in files: if file_ not in elements: logging.critical("File `%s' is not in the submission format " "for the task.", file_) return False if any(element not in files for element in elements): logger.warning("Not all files from the submission format were " "provided.") # files and elements now coincide. We compute the language for # each file and check that they do not mix. language = None for file_ in files: this_language = filename_to_language(files[file_]) if this_language is None and "%l" in file_: logger.critical("Cannot recognize language for file `%s'.", file_) return False if language is None: language = this_language elif this_language is not None and language != this_language: logger.critical("Mixed-language submission detected.") return False # Store all files from the arguments, and obtain their digests.. file_digests = {} try: for file_ in files: digest = file_cacher.put_file_from_path( files[file_], "Submission file %s sent by %s at %d." % (file_, username, timestamp)) file_digests[file_] = digest except: logger.critical("Error while storing submission's file.", exc_info=True) return False # Create objects in the DB. submission = Submission(make_datetime(timestamp), language, participation=participation, task=task) for filename, digest in file_digests.items(): session.add(File(filename, digest, submission=submission)) session.add(submission) session.commit() return True
def initialize(self): """Send basic data to all the rankings. It's data that's supposed to be sent before the contest, that's needed to understand what we're talking about when we send submissions: contest, users, tasks. No support for teams, flags and faces. """ logger.info("Initializing rankings.") with SessionGen() as session: contest = Contest.get_from_id(self.contest_id, session) if contest is None: logger.error("Received request for unexistent contest " "id %s.", self.contest_id) raise KeyError("Contest not found.") contest_id = encode_id(contest.name) contest_data = { "name": contest.description, "begin": int(make_timestamp(contest.main_group.start)), "end": int(make_timestamp(contest.main_group.stop)), "score_precision": contest.score_precision} users = dict() teams = dict() for participation in contest.participations: user = participation.user team = participation.team if not participation.hidden: users[encode_id(user.username)] = { "f_name": user.first_name, "l_name": user.last_name, "team": encode_id(team.code) if team is not None else None, "unofficial": participation.unofficial, } if team is not None: teams[encode_id(team.code)] = { "name": team.name } tasks = dict() for task in contest.tasks: score_type = task.active_dataset.score_type_object tasks[encode_id(task.name)] = { "short_name": task.name, "name": task.title, "contest": encode_id(contest.name), "order": task.num, "max_score": score_type.max_score, "extra_headers": score_type.ranking_headers, "score_precision": task.score_precision, "score_mode": task.score_mode, } self.enqueue(ProxyOperation(ProxyExecutor.CONTEST_TYPE, {contest_id: contest_data})) self.enqueue(ProxyOperation(ProxyExecutor.TEAM_TYPE, teams)) self.enqueue(ProxyOperation(ProxyExecutor.USER_TYPE, users)) self.enqueue(ProxyOperation(ProxyExecutor.TASK_TYPE, tasks))
def export_submissions(target_dir, contest_names, overwrite=False, make_dir=True): """ Export all submissions from the given contests to the given directory. If overwrite is true, existing files are overwritten. Otherwise, raise an exception if a file exists. If make_dir is true, create all subdirectories needed for the following format. Otherwise, assume they exist. The files of each submission are put in a directory: target_dir/contest_name/task_name/user_name/submission_string/ Where submission_string includes the date, time, task, user, score. For example: 2018-01-01.10-00.1.task_name.username.score-100 2018-01-01.10-00.2.task_name.username.compilation-fail """ with SessionGen() as session: for contest_name in contest_names: contest = session.query(Contest)\ .filter(Contest.name == unicode(contest_name))\ .first() if contest is None: raise Exception("Contest not found: %s" % contest_name) logger.info("Querying database for submissions in contest %s...", contest_name) submissions = session.query(Submission)\ .filter(Participation.contest_id == contest.id)\ .join(Submission.task)\ .join(Submission.files)\ .join(Submission.results)\ .join(SubmissionResult.dataset)\ .join(Submission.participation)\ .join(Participation.user)\ .filter(Dataset.id == Task.active_dataset_id)\ .with_entities(Submission.id, Submission.language, Submission.timestamp, SubmissionResult.score, SubmissionResult.compilation_outcome, File.filename, File.digest, User.username, Task.name)\ .all() logger.info("Found %d submissions. Saving...", len(submissions)) for (index, row) in enumerate(submissions, 1): logger.info("Contest %s: saving submission (%d / %d)", contest_name, index, len(submissions)) # Get submission info and target file path. sid, language, timestamp, score, comp_outcome, filename,\ digest, username, task_name = row file_path = _get_submission_file_path(target_dir, sid, language, timestamp, score, comp_outcome, filename, username, task_name, contest_name) # Don't overwrite if not allowed. if not overwrite and os.path.exists(file_path): raise Exception("File exists: %s" % file_path) # Make directories if necessary. if make_dir: dir_path = os.path.dirname(file_path) if not os.path.exists(dir_path): os.makedirs(dir_path) # Save the file. fso = FSObject.get_from_digest(digest, session) with fso.get_lobject(mode="rb") as file_obj: data = file_obj.read() with open(file_path, "w") as stream: stream.write(data)
def add_submissions(contest_name, task_name, username, items): """ Add submissions from the given user to the given task in the given contest. Each item corresponds to a submission, and should contain a dictionary which maps formatted file names to paths. For example, in batch tasks the format is "Task.%l", so one submission would be {"Task.%l": "path/to/task.cpp"}. """ # We connect to evaluation service to try and notify it about # the new submissions. Otherwise, it will pick it up only on # the next sweep for missed operations. rs = RemoteServiceClient(ServiceCoord("EvaluationService", 0)) rs.connect() with SessionGen() as session: user = get_user(session, username) contest = get_contest(session, contest_name) participation = get_participation(session, contest, user) task = get_task(session, task_name, contest) elements = set(format_element.filename for format_element in task.submission_format) file_cacher = FileCacher() # We go over all submissions twice. First we validate the # submission format. for submission_dict in items: for (format_file_name, path) in submission_dict.iteritems(): if format_file_name not in elements: raise Exception("Unexpected submission file: %s. " "Expected elements: %s" % (format_file_name, elements)) if not os.path.isfile(path): raise Exception("File not found: %s" % path) # Now add to database. for submission_dict in items: if not submission_dict: continue timestamp = time.time() file_digests = {} language_name = None for (format_file_name, path) in submission_dict.iteritems(): digest = file_cacher.put_file_from_path( path, "Submission file %s sent by %s at %d." % (path, username, timestamp)) file_digests[format_file_name] = digest current_language = filename_to_language(path) if current_language is not None: language_name = current_language.name submission = Submission(make_datetime(timestamp), language_name, participation=participation, task=task) for filename, digest in file_digests.items(): session.add(File(filename, digest, submission=submission)) session.add(submission) session.commit() rs.new_submission(submission_id=submission.id) rs.disconnect()
def do_export(self): """Run the actual export code.""" logger.info("Starting export.") export_dir = self.export_target archive_info = get_archive_info(self.export_target) if archive_info["write_mode"] != "": # We are able to write to this archive. if os.path.exists(self.export_target): logger.critical("The specified file already exists, " "I won't overwrite it.") return False export_dir = os.path.join(tempfile.mkdtemp(), archive_info["basename"]) logger.info("Creating dir structure.") try: os.mkdir(export_dir) except OSError: logger.critical("The specified directory already exists, " "I won't overwrite it.") return False files_dir = os.path.join(export_dir, "files") descr_dir = os.path.join(export_dir, "descriptions") os.mkdir(files_dir) os.mkdir(descr_dir) with SessionGen() as session: # Export files. logger.info("Exporting files.") if self.dump_files: for contest_id in self.contests_ids: contest = Contest.get_from_id(contest_id, session) files = enumerate_files( session, contest, skip_submissions=self.skip_submissions, skip_user_tests=self.skip_user_tests, skip_users=self.skip_users, skip_print_jobs=self.skip_print_jobs, skip_generated=self.skip_generated) for file_ in files: if not self.safe_get_file(file_, os.path.join(files_dir, file_), os.path.join(descr_dir, file_)): return False # Export data in JSON format. if self.dump_model: logger.info("Exporting data to a JSON file.") # We use strings because they'll be the keys of a JSON # object self.ids = {} self.queue = [] data = dict() for cls, lst in [(Contest, self.contests_ids), (User, self.users_ids), (Task, self.tasks_ids)]: for i in lst: obj = cls.get_from_id(i, session) self.get_id(obj) # Specify the "root" of the data graph data["_objects"] = list(self.ids.values()) while len(self.queue) > 0: obj = self.queue.pop(0) data[self.ids[obj.sa_identity_key]] = \ self.export_object(obj) data["_version"] = model_version destination = os.path.join(export_dir, "contest.json") with open(destination, "wt", encoding="utf-8") as fout: json.dump(data, fout, indent=4, sort_keys=True) # If the admin requested export to file, we do that. if archive_info["write_mode"] != "": with tarfile.open(self.export_target, archive_info["write_mode"]) as archive: archive.add(export_dir, arcname=archive_info["basename"]) rmtree(export_dir) logger.info("Export finished.") return True
def submissions_status(contest_id): """Returns a dictionary of statistics about the number of submissions on a specific status in the given contest. There are six statuses: evaluated, compilation failed, evaluating, compiling, maximum number of attempts of compilations reached, the same for evaluations. The last two should not happen and require a check from the admin. The status of a submission is checked on its result for the active dataset of its task. contest_id (int|None): counts are restricted to this contest, or None for no restrictions. return (dict): statistics on the submissions. """ # TODO: at the moment this counts all submission results for # the live datasets. It is interesting to show also numbers # for the datasets with autojudge, and for all datasets. stats = {} with SessionGen() as session: base_query = session\ .query(func.count(SubmissionResult.submission_id))\ .select_from(SubmissionResult)\ .join(Dataset)\ .join(Task, Dataset.task_id == Task.id)\ .filter(Task.active_dataset_id == SubmissionResult.dataset_id) if contest_id is not None: base_query = base_query\ .filter(Task.contest_id == contest_id) compiled = base_query.filter(SubmissionResult.filter_compiled()) evaluated = compiled.filter(SubmissionResult.filter_evaluated()) not_compiled = base_query.filter( not_(SubmissionResult.filter_compiled())) not_evaluated = compiled.filter( SubmissionResult.filter_compilation_succeeded(), not_(SubmissionResult.filter_evaluated())) queries = {} queries['compiling'] = not_compiled.filter( SubmissionResult.compilation_tries < EvaluationService.EvaluationService.MAX_COMPILATION_TRIES) queries['max_compilations'] = not_compiled.filter( SubmissionResult.compilation_tries >= EvaluationService.EvaluationService.MAX_COMPILATION_TRIES) queries['compilation_fail'] = base_query.filter( SubmissionResult.filter_compilation_failed()) queries['evaluating'] = not_evaluated.filter( SubmissionResult.evaluation_tries < EvaluationService.EvaluationService.MAX_EVALUATION_TRIES) queries['max_evaluations'] = not_evaluated.filter( SubmissionResult.evaluation_tries >= EvaluationService.EvaluationService.MAX_EVALUATION_TRIES) queries['scoring'] = evaluated.filter( not_(SubmissionResult.filter_scored())) queries['scored'] = evaluated.filter( SubmissionResult.filter_scored()) total_query = session\ .query(func.count(Submission.id))\ .select_from(Submission)\ .join(Task, Submission.task_id == Task.id) if contest_id is not None: total_query = total_query\ .filter(Task.contest_id == contest_id) queries['total'] = total_query stats = {} keys = list(queries.keys()) results = queries[keys[0]].union_all( *(queries[key] for key in keys[1:])).all() for i, k in enumerate(keys): stats[k] = results[i][0] stats['compiling'] += 2 * stats['total'] - sum(stats.values()) return stats
def assertSubmissionCount(self, count): """Assert that we have that many submissions in the DB""" with SessionGen() as session: self.assertEqual(session.query(Submission).count(), count)
def write_results(self, items): """Receive worker results from the cache and writes them to the DB. Grouping results together by object (i.e., submission result or user test result) and type (compilation or evaluation) allows this method to talk less to the DB, for example by retrieving datasets and submission results only once instead of once for every result. items ([(operation, Result)]): the results received by ES but not yet written to the db. """ logger.info("Starting commit process...") # Reorganize the results by submission/usertest result and # operation type (i.e., group together the testcase # evaluations for the same submission and dataset). by_object_and_type = defaultdict(list) for operation, result in items: t = (operation.type_, operation.object_id, operation.dataset_id) by_object_and_type[t].append((operation, result)) with SessionGen() as session: # Dictionary holding the objects we use repeatedly, # indexed by id, to avoid querying them multiple times. # TODO: this pattern is used in WorkerPool and should be # abstracted away. datasets = dict() subs = dict() srs = dict() for key, operation_results in iteritems(by_object_and_type): type_, object_id, dataset_id = key # Get dataset. if dataset_id not in datasets: datasets[dataset_id] = session.query(Dataset)\ .filter(Dataset.id == dataset_id)\ .options(joinedload(Dataset.testcases))\ .first() dataset = datasets[dataset_id] if dataset is None: logger.error("Could not find dataset %d in the database.", dataset_id) continue # Get submission or user test, and their results. if type_ in [ESOperation.COMPILATION, ESOperation.EVALUATION]: if object_id not in subs: subs[object_id] = \ Submission.get_from_id(object_id, session) object_ = subs[object_id] if object_ is None: logger.error( "Could not find submission %d " "in the database.", object_id) continue result_id = (object_id, dataset_id) if result_id not in srs: srs[result_id] = object_.get_result_or_create(dataset) object_result = srs[result_id] else: # We do not cache user tests as they can come up # only once. object_ = UserTest.get_from_id(object_id, session) object_result = object_.get_result_or_create(dataset) self.write_results_one_object_and_type(session, object_result, operation_results) logger.info("Committing evaluations...") session.commit() for type_, object_id, dataset_id in by_object_and_type: if type_ == ESOperation.EVALUATION: submission_result = srs[(object_id, dataset_id)] dataset = datasets[dataset_id] if len(submission_result.evaluations) == \ len(dataset.testcases): submission_result.set_evaluation_outcome() logger.info("Committing evaluation outcomes...") session.commit() logger.info("Ending operations for %s objects...", len(by_object_and_type)) for type_, object_id, dataset_id in by_object_and_type: if type_ == ESOperation.COMPILATION: submission_result = srs[(object_id, dataset_id)] self.compilation_ended(submission_result) elif type_ == ESOperation.EVALUATION: submission_result = srs[(object_id, dataset_id)] if submission_result.evaluated(): self.evaluation_ended(submission_result) elif type_ == ESOperation.USER_TEST_COMPILATION: user_test_result = UserTest\ .get_from_id(object_id, session)\ .get_result(datasets[dataset_id]) self.user_test_compilation_ended(user_test_result) elif type_ == ESOperation.USER_TEST_EVALUATION: user_test_result = UserTest\ .get_from_id(object_id, session)\ .get_result(datasets[dataset_id]) self.user_test_evaluation_ended(user_test_result) logger.info("Done")
def do_import(self): """Run the actual import code.""" logger.info("Starting import.") archive = None if Archive.is_supported(self.import_source): archive = Archive(self.import_source) self.import_dir = archive.unpack() file_names = os.listdir(self.import_dir) if len(file_names) != 1: logger.critical("Cannot find a root directory in %s.", self.import_source) archive.cleanup() return False self.import_dir = os.path.join(self.import_dir, file_names[0]) if self.drop: logger.info("Dropping and recreating the database.") try: if not (drop_db() and init_db()): logger.critical( "Unexpected error while dropping " "and recreating the database.", exc_info=True) return False except Exception: logger.critical("Unable to access DB.", exc_info=True) return False with SessionGen() as session: # Import the contest in JSON format. if self.load_model: logger.info("Importing the contest from a JSON file.") with io.open(os.path.join(self.import_dir, "contest.json"), "rb") as fin: # TODO - Throughout all the code we'll assume the # input is correct without actually doing any # validations. Thus, for example, we're not # checking that the decoded object is a dict... self.datas = json.load(fin) # If the dump has been exported using a data model # different than the current one (that is, a previous # one) we try to update it. # If no "_version" field is found we assume it's a v1.0 # export (before the new dump format was introduced). dump_version = self.datas.get("_version", 0) if dump_version < model_version: logger.warning( "The dump you're trying to import has been created " "by an old version of CMS (it declares data model " "version %d). It may take a while to adapt it to " "the current data model (which is version %d). You " "can use cmsDumpUpdater to update the on-disk dump " "and speed up future imports.", dump_version, model_version) elif dump_version > model_version: logger.critical( "The dump you're trying to import has been created " "by a version of CMS newer than this one (it " "declares data model version %d) and there is no " "way to adapt it to the current data model (which " "is version %d). You probably need to update CMS to " "handle it. It is impossible to proceed with the " "importation.", dump_version, model_version) return False else: logger.info("Importing dump with data model version %d.", dump_version) for version in range(dump_version, model_version): # Update from version to version+1 updater = __import__( "cmscontrib.updaters.update_%d" % (version + 1), globals(), locals(), ["Updater"]).Updater(self.datas) self.datas = updater.run() self.datas["_version"] = version + 1 assert self.datas["_version"] == model_version self.objs = dict() for id_, data in iteritems(self.datas): if not id_.startswith("_"): self.objs[id_] = self.import_object(data) for id_, data in iteritems(self.datas): if not id_.startswith("_"): self.add_relationships(data, self.objs[id_]) for k, v in list(iteritems(self.objs)): # Skip submissions if requested if self.skip_submissions and isinstance(v, Submission): del self.objs[k] # Skip user_tests if requested if self.skip_user_tests and isinstance(v, UserTest): del self.objs[k] # Skip print jobs if requested if self.skip_print_jobs and isinstance(v, PrintJob): del self.objs[k] # Skip generated data if requested if self.skip_generated and \ isinstance(v, (SubmissionResult, UserTestResult)): del self.objs[k] contest_id = list() contest_files = set() # We add explicitly only the top-level objects: # contests, and tasks and users not contained in any # contest. This will add on cascade all dependent # objects, and not add orphaned objects (like those # that depended on submissions or user tests that we # might have removed above). for id_ in self.datas["_objects"]: obj = self.objs[id_] session.add(obj) session.flush() if isinstance(obj, Contest): contest_id += [obj.id] contest_files |= enumerate_files( session, obj, skip_submissions=self.skip_submissions, skip_user_tests=self.skip_user_tests, skip_print_jobs=self.skip_print_jobs, skip_generated=self.skip_generated) session.commit() else: contest_id = None contest_files = None # Import files. if self.load_files: logger.info("Importing files.") files_dir = os.path.join(self.import_dir, "files") descr_dir = os.path.join(self.import_dir, "descriptions") files = set(os.listdir(files_dir)) descr = set(os.listdir(descr_dir)) if not descr <= files: logger.warning("Some files do not have an associated " "description.") if not files <= descr: logger.warning("Some descriptions do not have an " "associated file.") if not (contest_files is None or files <= contest_files): # FIXME Check if it's because this is a light import # or because we're skipping submissions or user_tests logger.warning("The dump contains some files that are " "not needed by the contest.") if not (contest_files is None or contest_files <= files): # The reason for this could be that it was a light # export that's not being reimported as such. logger.warning("The contest needs some files that are " "not contained in the dump.") # Limit import to files we actually need. if contest_files is not None: files &= contest_files for digest in files: file_ = os.path.join(files_dir, digest) desc = os.path.join(descr_dir, digest) if not self.safe_put_file(file_, desc): logger.critical( "Unable to put file `%s' in the DB. " "Aborting. Please remove the contest " "from the database.", file_) # TODO: remove contest from the database. return False # Clean up, if an archive was used if archive is not None: archive.cleanup() if contest_id is not None: logger.info("Import finished (contest id: %s).", ", ".join("%d" % id_ for id_ in contest_id)) else: logger.info("Import finished.") return True
def execute(self, entry): """Print a print job. This is the core of PrintingService. entry (QueueEntry): the entry containing the operation to perform. """ # TODO: automatically re-enqueue in case of a recoverable # error. printjob_id = entry.item.printjob_id with SessionGen() as session: # Obtain print job. printjob = PrintJob.get_from_id(printjob_id, session) if printjob is None: raise ValueError("Print job %d not found in the database." % printjob_id) user = printjob.participation.user contest = printjob.participation.contest timezone = get_timezone(user, contest) timestr = format_datetime(printjob.timestamp, timezone) filename = printjob.filename # Check if it's ready to be printed. if printjob.done: logger.info("Print job %d was already sent to the printer.", printjob_id) directory = tempfile.mkdtemp(dir=config.temp_dir) logger.info("Preparing print job in directory %s", directory) # Take the base name just to be sure. relname = "source_" + os.path.basename(filename) source = os.path.join(directory, relname) with open(source, "wb") as file_: self.file_cacher.get_file_to_fobj(printjob.digest, file_) if filename.endswith(".pdf") and config.pdf_printing_allowed: source_pdf = source else: # Convert text to ps. source_ps = os.path.join(directory, "source.ps") cmd = ["a2ps", source, "--delegate=no", "--output=" + source_ps, "--medium=%s" % config.paper_size.capitalize(), "--portrait", "--columns=1", "--rows=1", "--pages=1-%d" % (config.max_pages_per_job), "--header=", "--footer=", "--left-footer=", "--right-footer=", "--center-title=" + filename, "--left-title=" + timestr] ret = subprocess.call(cmd, cwd=directory) if ret != 0: raise Exception( "Failed to convert text file to ps with command: %s" "(error %d)" % (pretty_print_cmdline(cmd), ret)) if not os.path.exists(source_ps): logger.warning("Unable to convert from text to ps.") printjob.done = True printjob.status = json.dumps([ N_("Invalid file")]) session.commit() rmtree(directory) return # Convert ps to pdf source_pdf = os.path.join(directory, "source.pdf") cmd = ["ps2pdf", "-sPAPERSIZE=%s" % config.paper_size.lower(), source_ps] ret = subprocess.call(cmd, cwd=directory) if ret != 0: raise Exception( "Failed to convert ps file to pdf with command: %s" "(error %d)" % (pretty_print_cmdline(cmd), ret)) # Find out number of pages with open(source_pdf, "rb") as file_: pdfreader = PdfFileReader(file_) page_count = pdfreader.getNumPages() logger.info("Preparing %d page(s) (plus the title page)", page_count) if page_count > config.max_pages_per_job: logger.info("Too many pages.") printjob.done = True printjob.status = json.dumps([ N_("Print job has too many pages")]) session.commit() rmtree(directory) return # Add the title page title_tex = os.path.join(directory, "title_page.tex") title_pdf = os.path.join(directory, "title_page.pdf") with open(title_tex, "w") as f: f.write(self.template_loader.load("title_page.tex") .generate(user=user, filename=filename, timestr=timestr, page_count=page_count, paper_size=config.paper_size)) cmd = ["pdflatex", "-interaction", "nonstopmode", title_tex] ret = subprocess.call(cmd, cwd=directory) if ret != 0: raise Exception( "Failed to create title page with command: %s" "(error %d)" % (pretty_print_cmdline(cmd), ret)) pdfmerger = PdfFileMerger() with open(title_pdf, "rb") as file_: pdfmerger.append(file_) with open(source_pdf, "rb") as file_: pdfmerger.append(file_) result = os.path.join(directory, "document.pdf") with open(result, "wb") as file_: pdfmerger.write(file_) try: printer_connection = cups.Connection() printer_connection.printFile( config.printer, result, "Printout %d" % printjob_id, {}) except cups.IPPError as error: logger.error("Unable to print: `%s'.", error) else: printjob.done = True printjob.status = json.dumps([N_("Sent to printer")]) session.commit() finally: rmtree(directory)
def execute(self, entry): """Assign a score to a submission result. This is the core of ScoringService: here we retrieve the result from the database, check if it is in the correct status, instantiate its ScoreType, compute its score, store it back in the database and tell ProxyService to update RWS if needed. entry (QueueEntry): entry containing the operation to perform. """ operation = entry.item with SessionGen() as session: # Obtain submission. submission = Submission.get_from_id(operation.submission_id, session) if submission is None: raise ValueError("Submission %d not found in the database." % operation.submission_id) # Obtain dataset. dataset = Dataset.get_from_id(operation.dataset_id, session) if dataset is None: raise ValueError("Dataset %d not found in the database." % operation.dataset_id) # Obtain submission result. submission_result = submission.get_result(dataset) # It means it was not even compiled (for some reason). if submission_result is None: raise ValueError( "Submission result %d(%d) was not found." % (operation.submission_id, operation.dataset_id)) # Check if it's ready to be scored. if not submission_result.needs_scoring(): if submission_result.scored(): logger.info("Submission result %d(%d) is already scored.", operation.submission_id, operation.dataset_id) return else: raise ValueError( "The state of the submission result " "%d(%d) doesn't allow scoring." % (operation.submission_id, operation.dataset_id)) # Instantiate the score type. score_type = get_score_type(dataset=dataset) # Compute score and fill it in the database. submission_result.score, \ submission_result.score_details, \ submission_result.public_score, \ submission_result.public_score_details, \ ranking_score_details = \ score_type.compute_score(submission_result) submission_result.ranking_score_details = \ json.dumps(ranking_score_details) # Store it. session.commit() # If dataset is the active one, update RWS. if dataset is submission.task.active_dataset: logger.info("Submission scored %.1f seconds after submission", (make_datetime() - submission.timestamp).total_seconds()) self.proxy_service.submission_scored( submission_id=submission.id)
def wsgi_app(self, environ, start_response): route = self.router.bind_to_environ(environ) try: endpoint, args = route.match() except HTTPException: return NotFound() try: if endpoint == 'dbfile': return self.dbfile_handler(environ, args) except HTTPException as e: return e request = Request(environ) if request.mimetype != 'application/json': logger.warning('Request not in JSON') data = dict() else: try: data = json.load(request.stream) except (ValueError, TypeError): logger.warning('JSON parse error') data = dict() if 'first' in data and 'last' in data: data['first'] = int(data['first']) data['last'] = int(data['last']) if data['first'] < 0 or data['first'] > data['last']: return BadRequest() with SessionGen() as local.session: try: username = data['username'] token = data['token'] local.participation = self.get_participation(username, token) local.user = local.participation.user except (BadRequest, KeyError): local.user = None if local.user is None: local.access_level = 7 # Access level of unlogged user else: local.access_level = local.user.social_user.access_level try: local.data = data local.resp = dict() ans = getattr(self, args['target'] + '_handler')() local.resp['success'] = 1 except AttributeError: logger.error('Endpoint %s not implemented yet!' % endpoint) logger.error(traceback.format_exc()) return NotFound() except KeyError: logger.error(traceback.format_exc()) return BadRequest() response = Response() response.mimetype = 'application/json' response.status_code = 200 if ans is None: response.data = json.dumps(local.resp) else: response.data = json.dumps({'success': 0, 'error': ans}) return response
def invalidate_submission(self, contest_id=None, submission_id=None, dataset_id=None, participation_id=None, task_id=None, level="compilation"): """Request to invalidate some computed data. Invalidate the compilation and/or evaluation data of the SubmissionResults that: - belong to submission_id or, if None, to any submission of participation_id and/or task_id or, if both None, to any submission of the contest asked for, or, if all three are None, the contest this service is running for (or all contests). - belong to dataset_id or, if None, to any dataset of task_id or, if None, to any dataset of any task of the contest this service is running for. The data is cleared, the operations involving the submissions currently enqueued are deleted, and the ones already assigned to the workers are ignored. New appropriate operations are enqueued. submission_id (int|None): id of the submission to invalidate, or None. dataset_id (int|None): id of the dataset to invalidate, or None. participation_id (int|None): id of the participation to invalidate, or None. task_id (int|None): id of the task to invalidate, or None. level (string): 'compilation' or 'evaluation' """ logger.info("Invalidation request received.") # Validate arguments # TODO Check that all these objects belong to this contest. if level not in ("compilation", "evaluation"): raise ValueError("Unexpected invalidation level `%s'." % level) if contest_id is None: contest_id = self.contest_id with SessionGen() as session: # When invalidating a dataset we need to know the task_id, otherwise # get_submissions will return all the submissions of the contest. if dataset_id is not None and task_id is None \ and submission_id is None: task_id = Dataset.get_from_id(dataset_id, session).task_id # First we load all involved submissions. submissions = get_submissions( # Give contest_id only if all others are None. contest_id if {participation_id, task_id, submission_id} == {None} else None, participation_id, task_id, submission_id, session) # Then we get all relevant operations, and we remove them # both from the queue and from the pool (i.e., we ignore # the workers involved in those operations). operations = get_relevant_operations(level, submissions, dataset_id) for operation in operations: try: self.dequeue(operation) except KeyError: pass # Ok, the operation wasn't in the queue. try: self.get_executor().pool.ignore_operation(operation) except LookupError: pass # Ok, the operation wasn't in the pool. # Then we find all existing results in the database, and # we remove them. submission_results = get_submission_results( # Give contest_id only if all others are None. contest_id if { participation_id, task_id, submission_id, dataset_id } == {None} else None, participation_id, # Provide the task_id only if the entire task has to be # reevaluated and not only a specific dataset. task_id if dataset_id is None else None, submission_id, dataset_id, session) logger.info("Submission results to invalidate %s for: %d.", level, len(submission_results)) for submission_result in submission_results: # We invalidate the appropriate data and queue the # operations to recompute those data. if level == "compilation": submission_result.invalidate_compilation() elif level == "evaluation": submission_result.invalidate_evaluation() # Finally, we re-enqueue the operations for the # submissions. for submission in submissions: self.submission_enqueue_operations(submission) session.commit() logger.info("Invalidate successfully completed.")
def acquire_worker(self, operations): """Tries to assign an operation to an available worker. If no workers are available then this returns None, otherwise this returns the chosen worker. operations ([ESOperation]): the operations to assign to a worker. return (int|None): None if no workers are available, the worker assigned to the operation otherwise. """ # We look for an available worker. try: shard = self.find_worker(WorkerPool.WORKER_INACTIVE, require_connection=True, random_worker=True) except LookupError: self._workers_available_event.clear() return None # Then we fill the info for future memory. self._add_operations(shard, operations) logger.debug("Worker %s acquired.", shard) self._start_time[shard] = make_datetime() session_required = any( [operation.job is None for operation in operations]) if session_required: with SessionGen() as session: jobs = [] datasets = {} submissions = {} user_tests = {} for operation in operations: logger.info("Asking worker %s to `%s'.", shard, operation) if operation.job is None: if operation.dataset_id not in datasets: datasets[operation.dataset_id] = \ Dataset.get_from_id( operation.dataset_id, session) object_ = None if operation.for_submission(): if operation.object_id not in submissions: submissions[operation.object_id] = \ Submission.get_from_id( operation.object_id, session) object_ = submissions[operation.object_id] else: if operation.object_id not in user_tests: user_tests[operation.object_id] = \ UserTest.get_from_id( operation.object_id, session) object_ = user_tests[operation.object_id] job = Job.from_operation( operation, object_, datasets[operation.dataset_id]) else: job = operation.job logger.info("Asking worker %s to `%s'.", shard, operation) jobs.append(job) job_group_dict = JobGroup(jobs).export_to_dict() else: jobs = [operation.job for operation in operations] job_group_dict = JobGroup(jobs).export_to_dict() self._worker[shard].execute_job_group( job_group_dict=job_group_dict, callback=self._service.action_finished, plus=shard) return shard