def file_length(digest, file_cacher=None, file_lengther=None): """Compute the length of the file identified by digest. digest (string): the digest of the file. file_cacher (FileCacher): the cacher to use, or None. file_lengther (class): a File-like object that tell the dimension of the input (see example above for how to write one). return (int): the length of the tile. """ if file_cacher is None: file_cacher = FileCacher() if file_lengther is None: file_lengther = FileLengther lengther = file_lengther() file_cacher.get_file(digest, file_obj=lengther) return lengther.tell()
def file_length(digest, file_cacher=None, file_lengther=None): """Compute the length of the file identified by digest. digest (string): the digest of the file. file_cacher (FileCacher): the cacher to use, or None. file_lengther (class): a File-like object that tell the dimension of the input (see example above for how to write one). return (int): the length of the tile. """ if file_cacher is None: file_cacher = FileCacher() if file_lengther is None: file_lengther = FileLengther lengther = file_lengther() file_cacher.get_file(digest, file_obj=lengther) return lengther.tell()
class SpoolExporter: """This service creates a tree structure "similar" to the one used in Italian IOI repository for storing the results of a contest. """ def __init__(self, contest_id, spool_dir): self.contest_id = contest_id self.spool_dir = spool_dir self.upload_dir = os.path.join(self.spool_dir, "upload") self.contest = None self.file_cacher = FileCacher() def run(self): """Interface to make the class do its job.""" return self.do_export() def do_export(self): """Run the actual export code. """ logger.operation = "exporting contest %s" % self.contest_id logger.info("Starting export.") logger.info("Creating dir structure.") try: os.mkdir(self.spool_dir) except OSError: logger.error("The specified directory already exists, " "I won't overwrite it.") return False os.mkdir(self.upload_dir) with SessionGen(commit=False) as session: self.contest = Contest.get_from_id(self.contest_id, session) # Creating users' directory. for user in self.contest.users: if not user.hidden: os.mkdir(os.path.join(self.upload_dir, user.username)) self.export_submissions() self.export_ranking() logger.info("Export finished.") logger.operation = "" return True def export_submissions(self): """Export submissions' source files. """ logger.info("Exporting submissions.") queue_file = codecs.open(os.path.join(self.spool_dir, "queue"), "w", encoding="utf-8") # FIXME - The enumeration of submission should be time-increasing for submission in self.contest.get_submissions(): if submission.user.hidden: continue logger.info("Exporting submission %s." % submission.id) username = submission.user.username task = submission.task.name timestamp = submission.timestamp # Get source files to the spool directory. file_digest = submission.files["%s.%s" % (task, "%l")].digest upload_filename = os.path.join( self.upload_dir, username, "%s.%d.%s" % (task, timestamp, submission.language)) self.file_cacher.get_file(file_digest, path=upload_filename) upload_filename = os.path.join( self.upload_dir, username, "%s.%s" % (task, submission.language)) self.file_cacher.get_file(file_digest, path=upload_filename) print >> queue_file, "./upload/%s/%s.%d.%s" % \ (username, task, timestamp, submission.language) # Write results file for the submission. if submission.evaluated(): res_file = codecs.open(os.path.join( self.spool_dir, "%d.%s.%s.%s.res" % (timestamp, username, task, submission.language)), "w", encoding="utf-8") res2_file = codecs.open(os.path.join( self.spool_dir, "%s.%s.%s.res" % (username, task, submission.language)), "w", encoding="utf-8") total = 0.0 for num, evaluation in enumerate(submission.evaluations): outcome = float(evaluation.outcome) total += outcome line = "Executing on file n. %2d %s (%.4f)" % \ (num, evaluation.text, outcome) print >> res_file, line print >> res2_file, line line = "Score: %.6f" % total print >> res_file, line print >> res2_file, line res_file.close() res2_file.close() print >> queue_file queue_file.close() def export_ranking(self): """Exports the ranking in csv and txt (human-readable) form. """ logger.info("Exporting ranking.") # Create a list of (points, usernames, [task_points]) to write # to the rankings. users = {} hidden_users = {} for user in self.contest.users: # Avoid hidden users. if not user.hidden: users[user.username] = [0.0, user.username, [0.0] * len(self.contest.tasks)] else: hidden_users[user.username] = True for (username, task_num), score in \ self.contest.ranking_view.scores.iteritems(): if username not in hidden_users: users[username][0] += score.score users[username][2][task_num] = score.score users = users.values() users.sort(reverse=True) ranking_file = codecs.open( os.path.join(self.spool_dir, "classifica.txt"), "w", encoding="utf-8") ranking_csv = codecs.open( os.path.join(self.spool_dir, "classifica.csv"), "w", encoding="utf-8") # Write rankings' header. print >> ranking_file, "Classifica finale del contest `%s'" % \ self.contest.description points_line = " %10s" * len(self.contest.tasks) csv_points_line = ",%s" * len(self.contest.tasks) print >> ranking_file, ("%20s %10s" % ("Utente", "Totale")) + \ (points_line % tuple([t.name for t in self.contest.tasks])) print >> ranking_csv, ("%s,%s" % ("utente", "totale")) + \ (csv_points_line % tuple([t.name for t in self.contest.tasks])) # Write rankings' content. points_line = " %10.3f" * len(self.contest.tasks) csv_points_line = ",%.6f" * len(self.contest.tasks) for total, user, problems in users: print >> ranking_file, ("%20s %10.3f" % (user, total)) + \ (points_line % tuple(problems)) print >> ranking_csv, ("%s,%.6f" % (user, total)) + \ (csv_points_line % tuple(problems)) ranking_file.close() ranking_csv.close()
class Worker(Service): """This service implement the possibility to compile and evaluate submissions in a sandbox. The instructions to follow for the operations are in the TaskType classes, while the sandbox is in the Sandbox module. """ JOB_TYPE_COMPILATION = "compile" JOB_TYPE_EVALUATION = "evaluate" def __init__(self, shard): logger.initialize(ServiceCoord("Worker", shard)) Service.__init__(self, shard, custom_logger=logger) self.file_cacher = FileCacher(self) self.task_type = None self.work_lock = threading.Lock() self.session = None @rpc_method def ignore_job(self): """RPC that inform the worker that its result for the current action will be discarded. The worker will try to return as soon as possible even if this means that the result are inconsistent. """ # We inform the task_type to quit as soon as possible. logger.info("Trying to interrupt job as requested.") try: self.task_type.ignore_job = True except AttributeError: pass # Job concluded right under our nose, that's ok too. # FIXME - rpc_threaded is disable because it makes the call fail: # we should investigate on this @rpc_method @rpc_threaded def precache_files(self, contest_id): """RPC to ask the worker to precache of files in the contest. contest_id (int): the id of the contest """ # Lock is not needed if the admins correctly placed cache and # temp directories in the same filesystem. This is what # usually happens since they are children of the same, # cms-created, directory. logger.info("Precaching files for contest %d." % contest_id) with SessionGen(commit=False) as session: contest = Contest.get_from_id(contest_id, session) for digest in contest.enumerate_files(skip_submissions=True, skip_user_tests=True): self.file_cacher.get_file(digest) logger.info("Precaching finished.") @rpc_method @rpc_threaded def execute_job(self, job_dict): job = Job.import_from_dict_with_type(job_dict) if self.work_lock.acquire(False): try: logger.operation = "job '%s'" % (job.info) logger.info("Request received") job.shard = self.shard self.task_type = get_task_type(job, self.file_cacher) self.task_type.execute_job() logger.info("Request finished.") return job.export_to_dict() except: err_msg = "Worker failed on operation `%s'" % logger.operation logger.error("%s\n%s" % (err_msg, traceback.format_exc())) raise JobException(err_msg) finally: self.task_type = None self.session = None logger.operation = "" self.work_lock.release() else: err_msg = "Request '%s' received, " \ "but declined because of acquired lock" % \ (job.info) logger.warning(err_msg) raise JobException(err_msg)
class SpoolExporter: """This service creates a tree structure "similar" to the one used in Italian IOI repository for storing the results of a contest. """ def __init__(self, contest_id, spool_dir): self.contest_id = contest_id self.spool_dir = spool_dir self.upload_dir = os.path.join(self.spool_dir, "upload") self.contest = None self.submissions = None self.file_cacher = FileCacher() def run(self): """Interface to make the class do its job.""" return self.do_export() def do_export(self): """Run the actual export code. """ logger.operation = "exporting contest %s" % self.contest_id logger.info("Starting export.") logger.info("Creating dir structure.") try: os.mkdir(self.spool_dir) except OSError: logger.critical("The specified directory already exists, " "I won't overwrite it.") return False os.mkdir(self.upload_dir) with SessionGen(commit=False) as session: self.contest = Contest.get_from_id(self.contest_id, session) self.submissions = sorted( (submission for submission in self.contest.get_submissions() if not submission.user.hidden), key=lambda submission: submission.timestamp) # Creating users' directory. for user in self.contest.users: if not user.hidden: os.mkdir(os.path.join(self.upload_dir, user.username)) try: self.export_submissions() self.export_ranking() except Exception as error: logger.critical("Generic error. %r" % error) return False logger.info("Export finished.") logger.operation = "" return True def export_submissions(self): """Export submissions' source files. """ logger.info("Exporting submissions.") queue_file = codecs.open(os.path.join(self.spool_dir, "queue"), "w", encoding="utf-8") for submission in self.submissions: logger.info("Exporting submission %s." % submission.id) username = submission.user.username task = submission.task.name timestamp = submission.timestamp # Get source files to the spool directory. file_digest = submission.files["%s.%s" % (task, "%l")].digest upload_filename = os.path.join( self.upload_dir, username, "%s.%d.%s" % (task, timestamp, submission.language)) self.file_cacher.get_file(file_digest, path=upload_filename) upload_filename = os.path.join( self.upload_dir, username, "%s.%s" % (task, submission.language)) self.file_cacher.get_file(file_digest, path=upload_filename) print >> queue_file, "./upload/%s/%s.%d.%s" % \ (username, task, timestamp, submission.language) # Write results file for the submission. if submission.evaluated(): res_file = codecs.open(os.path.join( self.spool_dir, "%d.%s.%s.%s.res" % (timestamp, username, task, submission.language)), "w", encoding="utf-8") res2_file = codecs.open(os.path.join( self.spool_dir, "%s.%s.%s.res" % (username, task, submission.language)), "w", encoding="utf-8") total = 0.0 for num, evaluation in enumerate(submission.evaluations): outcome = float(evaluation.outcome) total += outcome line = "Executing on file n. %2d %s (%.4f)" % \ (num, evaluation.text, outcome) print >> res_file, line print >> res2_file, line line = "Score: %.6f" % total print >> res_file, line print >> res2_file, line res_file.close() res2_file.close() print >> queue_file queue_file.close() def export_ranking(self): """Exports the ranking in csv and txt (human-readable) form. """ logger.info("Exporting ranking.") # Create the structure to store the scores. scores = dict((user.username, 0.0) for user in self.contest.users if not user.hidden) task_scores = dict((task.id, dict((user.username, 0.0) for user in self.contest.users if not user.hidden)) for task in self.contest.tasks) last_scores = dict((task.id, dict((user.username, 0.0) for user in self.contest.users if not user.hidden)) for task in self.contest.tasks) # Make the score type compute the scores. scorers = {} for task in self.contest.tasks: scorers[task.id] = get_score_type(task=task) for submission in self.submissions: scorers[submission.task_id].add_submission( submission.id, submission.timestamp, submission.user.username, dict((ev.num, float(ev.outcome)) for ev in submission.evaluations), submission.tokened()) # Put together all the scores. for submission in self.submissions: task_id = submission.task_id username = submission.user.username details = scorers[task_id].pool[submission.id] last_scores[task_id][username] = details["score"] if details["tokened"]: task_scores[task_id][username] = max( task_scores[task_id][username], details["score"]) # Merge tokened and last submissions. for username in scores: for task_id in task_scores: task_scores[task_id][username] = max( task_scores[task_id][username], last_scores[task_id][username]) print username, [ task_scores[task_id][username] for task_id in task_scores ] scores[username] = sum(task_scores[task_id][username] for task_id in task_scores) sorted_usernames = sorted(scores.keys(), key=lambda username: (scores[username], username), reverse=True) sorted_tasks = sorted(self.contest.tasks, key=lambda task: task.num) ranking_file = codecs.open(os.path.join(self.spool_dir, "classifica.txt"), "w", encoding="utf-8") ranking_csv = codecs.open(os.path.join(self.spool_dir, "classifica.csv"), "w", encoding="utf-8") # Write rankings' header. n_tasks = len(sorted_tasks) print >> ranking_file, "Classifica finale del contest `%s'" % \ self.contest.description points_line = " %10s" * n_tasks csv_points_line = ",%s" * n_tasks print >> ranking_file, ("%20s %10s" % ("Utente", "Totale")) + \ (points_line % tuple([t.name for t in sorted_tasks])) print >> ranking_csv, ("%s,%s" % ("utente", "totale")) + \ (csv_points_line % tuple([t.name for t in sorted_tasks])) # Write rankings' content. points_line = " %10.3f" * n_tasks csv_points_line = ",%.6f" * n_tasks for username in sorted_usernames: user_scores = [ task_scores[task.id][username] for task in sorted_tasks ] print >> ranking_file, ("%20s %10.3f" % (username, scores[username])) + \ (points_line % tuple(user_scores)) print >> ranking_csv, ("%s,%.6f" % (username, scores[username])) + \ (csv_points_line % tuple(user_scores)) ranking_file.close() ranking_csv.close()
class Worker(Service): """This service implement the possibility to compile and evaluate submissions in a sandbox. The instructions to follow for the operations are in the TaskType classes, while the sandbox is in the Sandbox module. """ JOB_TYPE_COMPILATION = "compile" JOB_TYPE_EVALUATION = "evaluate" def __init__(self, shard): logger.initialize(ServiceCoord("Worker", shard)) Service.__init__(self, shard, custom_logger=logger) self.file_cacher = FileCacher(self) self.task_type = None self.work_lock = threading.Lock() self.session = None def get_submission_data(self, submission_id): """Given the id, returns the submission object and a new task type object of the correct type. submission_id (int): id of the submission. return (Submission, TaskType): corresponding objects. raise: JobException if id or task type not found. """ submission = Submission.get_from_id(submission_id, self.session) if submission is None: err_msg = "Couldn't find submission %s " \ "in the database." % submission_id logger.critical(err_msg) raise JobException(err_msg) try: task_type = get_task_type(submission, self.file_cacher) except KeyError as error: err_msg = "Task type `%s' not known for " \ "submission %s (error: %s)." % ( submission.task.task_type, submission_id, error) logger.error(err_msg) raise JobException(err_msg) return (submission, task_type) @rpc_method def ignore_job(self): """RPC that inform the worker that its result for the current action will be discarded. The worker will try to return as soon as possible even if this means that the result are inconsistent. """ # We inform the task_type to quit as soon as possible. logger.info("Trying to interrupt job as requested.") try: self.task_type.ignore_job = True except AttributeError: pass # Job concluded right under our nose, that's ok too. @rpc_method @rpc_threaded def compile(self, submission_id): """RPC to ask the worker to compile the submission. submission_id (int): the id of the submission to compile. """ return self.action(submission_id, Worker.JOB_TYPE_COMPILATION) @rpc_method @rpc_threaded def evaluate(self, submission_id): """RPC to ask the worker to evaluate the submission. submission_id (int): the id of the submission to evaluate. """ return self.action(submission_id, Worker.JOB_TYPE_EVALUATION) # FIXME - rpc_threaded is disable because it makes the call fail: # we should investigate on this @rpc_method @rpc_threaded def precache_files(self, contest_id): """RPC to ask the worker to precache of files in the contest. contest_id (int): the id of the contest """ # Lock is not needed if the admins correctly placed cache and # temp directories in the same filesystem. This is what # usually happens since they are children of the same, # cms-created, directory. logger.info("Precaching files for contest %d." % contest_id) with SessionGen(commit=False) as session: contest = Contest.get_from_id(contest_id, session) for digest in contest.enumerate_files(skip_submissions=True): self.file_cacher.get_file(digest) logger.info("Precaching finished.") def action(self, submission_id, job_type): """The actual work - that can be compilation or evaluation (the code is pretty much the same, the differencies are in what we ask TaskType to do). submission_id (string): the submission to which act on. job_type (string): a constant JOB_TYPE_*. """ if self.work_lock.acquire(False): try: logger.operation = "%s of submission %s" % (job_type, submission_id) logger.info("Request received: %s of submission %s." % (job_type, submission_id)) with SessionGen(commit=False) as self.session: # Retrieve submission and task_type. unused_submission, self.task_type = \ self.get_submission_data(submission_id) # Store in the task type the shard number. self.task_type.worker_shard = self.shard # Do the actual work. if job_type == Worker.JOB_TYPE_COMPILATION: task_type_action = self.task_type.compile elif job_type == Worker.JOB_TYPE_EVALUATION: task_type_action = self.task_type.evaluate else: raise KeyError("Unexpected job type %s." % job_type) logger.info("Request finished.") return task_type_action() except: err_msg = "Worker failed on operation `%s'" % logger.operation logger.error("%s\n%s" % (err_msg, traceback.format_exc())) raise JobException(err_msg) finally: self.task_type = None self.session = None logger.operation = "" self.work_lock.release() else: logger.warning("Request of %s of submission %s received, " "but declined because of acquired lock" % (job_type, submission_id)) return False
class ContestExporter: """This service exports every data about the contest that CMS knows. The process of exporting and importing again should be idempotent. """ def __init__(self, contest_id, export_target, skip_submissions, skip_user_tests, light): self.contest_id = contest_id self.skip_submissions = skip_submissions self.skip_user_tests = skip_user_tests self.light = light # If target is not provided, we use the contest's name. if export_target == "": with SessionGen(commit=False) as session: contest = Contest.get_from_id(self.contest_id, session) self.export_target = "dump_%s.tar.gz" % contest.name else: self.export_target = export_target self.file_cacher = FileCacher() def run(self): """Interface to make the class do its job.""" return self.do_export() def do_export(self): """Run the actual export code. """ logger.operation = "exporting contest %d" % self.contest_id logger.info("Starting export.") export_dir = self.export_target archive_info = get_archive_info(self.export_target) if archive_info["write_mode"] != "": # We are able to write to this archive. if os.path.exists(self.export_target): logger.critical("The specified file already exists, " "I won't overwrite it.") return False export_dir = os.path.join(tempfile.mkdtemp(), archive_info["basename"]) logger.info("Creating dir structure.") try: os.mkdir(export_dir) except OSError: logger.critical("The specified directory already exists, " "I won't overwrite it.") return False files_dir = os.path.join(export_dir, "files") descr_dir = os.path.join(export_dir, "descriptions") os.mkdir(files_dir) os.mkdir(descr_dir) with SessionGen(commit=False) as session: contest = Contest.get_from_id(self.contest_id, session) # Export files. logger.info("Exporting files.") files = contest.enumerate_files(self.skip_submissions, self.skip_user_tests, light=self.light) for _file in files: if not self.safe_get_file(_file, os.path.join( files_dir, _file), os.path.join(descr_dir, _file)): return False # Export the contest in JSON format. logger.info("Exporting the contest in JSON format.") self.ids = {contest: "0"} self.queue = [contest] data = dict() i = 0 while i < len(self.queue): obj = self.queue[i] data[self.ids[obj]] = self.export_object(obj) i += 1 def maybe_sort_numerically(x): try: if isinstance(x, tuple) or isinstance(x, list): x = x[0] x = int(x) except: pass return x with open(os.path.join(export_dir, "contest.json"), 'w') as fout: json.dump(data, fout, indent=4, sort_keys=True, item_sort_key=maybe_sort_numerically) # If the admin requested export to file, we do that. if archive_info["write_mode"] != "": archive = tarfile.open(self.export_target, archive_info["write_mode"]) archive.add(export_dir, arcname=archive_info["basename"]) archive.close() shutil.rmtree(export_dir) logger.info("Export finished.") logger.operation = "" return True def get_id(self, obj): if obj not in self.ids: # We use strings because they'll be the keys of a JSON object self.ids[obj] = str(len(self.ids)) self.queue.append(obj) return self.ids[obj] def export_object(self, obj): """Export the given object, returning a JSON-encodable dict The returned dict will contain a "_class" item (the name of the class of the given object), an item for each column property (with a value properly translated to a JSON-compatible type) and an item for each relationship property (which will be an ID or a collection of IDs). The IDs used in the exported dict aren't related to the ones used in the DB: they are newly generated and their scope is limited to the exported file only. They are shared among all classes (that is, two objects can never share the same ID, even if they are of different classes). The contest will have ID 0. If, when exporting the relationship, we find an object without an ID we generate a new ID, assign it to the object and append the object to the queue of objects to export. The self.skip_submissions flag controls wheter we export submissions (and all other objects that can be reached only by passing through a submission) or not. """ cls = type(obj) data = {"_class": cls.__name__} for prp in cls._col_props: col = prp.columns[0] col_type = type(col.type) val = getattr(obj, prp.key) if col_type in [Boolean, Integer, Float, String]: data[prp.key] = val elif col_type is DateTime: data[prp.key] = make_timestamp( val) if val is not None else None elif col_type is Interval: data[ prp.key] = val.total_seconds() if val is not None else None else: raise RuntimeError("Unknown SQLAlchemy column type: %s" % col_type) for prp in cls._rel_props: other_cls = prp.mapper.class_ # Skip submissions if requested if self.skip_submissions and other_cls is Submission: continue val = getattr(obj, prp.key) if val is None: data[prp.key] = None elif isinstance(val, other_cls): data[prp.key] = self.get_id(val) elif isinstance(val, list): data[prp.key] = list(self.get_id(i) for i in val) elif isinstance(val, dict): data[prp.key] = dict( (k, self.get_id(v)) for k, v in val.iteritems()) else: raise RuntimeError( "Unknown SQLAlchemy relationship type on %s: %s" % (prp.key, type(val))) return data def safe_get_file(self, digest, path, descr_path=None): """Get file from FileCacher ensuring that the digest is correct. digest (string): the digest of the file to retrieve. path (string): the path where to save the file. descr_path (string): the path where to save the description. return (bool): True if all ok, False if something wrong. """ # First get the file try: self.file_cacher.get_file(digest, path=path) except Exception as error: logger.error("File %s could not retrieved from file server (%r)." % (digest, error)) return False # Then check the digest calc_digest = sha1sum(path) if digest != calc_digest: logger.critical("File %s has wrong hash %s." % (digest, calc_digest)) return False # If applicable, retrieve also the description if descr_path is not None: with codecs.open(descr_path, 'w', encoding='utf-8') as fout: fout.write(self.file_cacher.describe(digest)) return True
class ContestExporter: """This service exports every data about the contest that CMS knows. The process of exporting and importing again should be idempotent. """ def __init__(self, contest_id, export_target, skip_submissions, skip_user_tests, light): self.contest_id = contest_id self.skip_submissions = skip_submissions self.skip_user_tests = skip_user_tests self.light = light # If target is not provided, we use the contest's name. if export_target == "": with SessionGen(commit=False) as session: contest = Contest.get_from_id(self.contest_id, session) self.export_target = "dump_%s.tar.gz" % contest.name else: self.export_target = export_target self.file_cacher = FileCacher() def run(self): """Interface to make the class do its job.""" return self.do_export() def do_export(self): """Run the actual export code. """ logger.operation = "exporting contest %d" % self.contest_id logger.info("Starting export.") export_dir = self.export_target archive_info = get_archive_info(self.export_target) if archive_info["write_mode"] != "": # We are able to write to this archive. if os.path.exists(self.export_target): logger.error("The specified file already exists, " "I won't overwrite it.") return False export_dir = os.path.join(tempfile.mkdtemp(), archive_info["basename"]) logger.info("Creating dir structure.") try: os.mkdir(export_dir) except OSError: logger.error("The specified directory already exists, " "I won't overwrite it.") return False files_dir = os.path.join(export_dir, "files") descr_dir = os.path.join(export_dir, "descriptions") os.mkdir(files_dir) os.mkdir(descr_dir) with SessionGen(commit=False) as session: contest = Contest.get_from_id(self.contest_id, session) # Export files. logger.info("Exporting files.") files = contest.enumerate_files(self.skip_submissions, self.skip_user_tests, light=self.light) for _file in files: if not self.safe_get_file(_file, os.path.join(files_dir, _file), os.path.join(descr_dir, _file)): return False # Export the contest in JSON format. logger.info("Exporting the contest in JSON format.") with open(os.path.join(export_dir, "contest.json"), 'w') as fout: json.dump(contest.export_to_dict( self.skip_submissions, self.skip_user_tests), fout, indent=4) # If the admin requested export to file, we do that. if archive_info["write_mode"] != "": archive = tarfile.open(self.export_target, archive_info["write_mode"]) archive.add(export_dir, arcname=archive_info["basename"]) archive.close() shutil.rmtree(export_dir) logger.info("Export finished.") logger.operation = "" return True def safe_get_file(self, digest, path, descr_path=None): """Get file from FileCacher ensuring that the digest is correct. digest (string): the digest of the file to retrieve. path (string): the path where to save the file. descr_path (string): the path where to save the description. return (bool): True if all ok, False if something wrong. """ # First get the file try: self.file_cacher.get_file(digest, path=path) except Exception as error: logger.error("File %s could not retrieved from file server (%r)." % (digest, error)) return False # Then check the digest calc_digest = sha1sum(path) if digest != calc_digest: logger.error("File %s has wrong hash %s." % (digest, calc_digest)) return False # If applicable, retrieve also the description if descr_path is not None: with codecs.open(descr_path, 'w', encoding='utf-8') as fout: fout.write(self.file_cacher.describe(digest)) return True
class SpoolExporter: """This service creates a tree structure "similar" to the one used in Italian IOI repository for storing the results of a contest. """ def __init__(self, contest_id, spool_dir): self.contest_id = contest_id self.spool_dir = spool_dir self.upload_dir = os.path.join(self.spool_dir, "upload") self.contest = None self.submissions = None self.file_cacher = FileCacher() def run(self): """Interface to make the class do its job.""" return self.do_export() def do_export(self): """Run the actual export code. """ logger.operation = "exporting contest %s" % self.contest_id logger.info("Starting export.") logger.info("Creating dir structure.") try: os.mkdir(self.spool_dir) except OSError: logger.critical("The specified directory already exists, " "I won't overwrite it.") return False os.mkdir(self.upload_dir) with SessionGen(commit=False) as session: self.contest = Contest.get_from_id(self.contest_id, session) self.submissions = sorted( (submission for submission in self.contest.get_submissions() if not submission.user.hidden), key=lambda submission: submission.timestamp, ) # Creating users' directory. for user in self.contest.users: if not user.hidden: os.mkdir(os.path.join(self.upload_dir, user.username)) try: self.export_submissions() self.export_ranking() except Exception as error: logger.critical("Generic error. %r" % error) return False logger.info("Export finished.") logger.operation = "" return True def export_submissions(self): """Export submissions' source files. """ logger.info("Exporting submissions.") queue_file = codecs.open(os.path.join(self.spool_dir, "queue"), "w", encoding="utf-8") for submission in self.submissions: logger.info("Exporting submission %s." % submission.id) username = submission.user.username task = submission.task.name timestamp = submission.timestamp # Get source files to the spool directory. file_digest = submission.files["%s.%s" % (task, "%l")].digest upload_filename = os.path.join( self.upload_dir, username, "%s.%d.%s" % (task, timestamp, submission.language) ) self.file_cacher.get_file(file_digest, path=upload_filename) upload_filename = os.path.join(self.upload_dir, username, "%s.%s" % (task, submission.language)) self.file_cacher.get_file(file_digest, path=upload_filename) print >> queue_file, "./upload/%s/%s.%d.%s" % (username, task, timestamp, submission.language) # Write results file for the submission. if submission.evaluated(): res_file = codecs.open( os.path.join(self.spool_dir, "%d.%s.%s.%s.res" % (timestamp, username, task, submission.language)), "w", encoding="utf-8", ) res2_file = codecs.open( os.path.join(self.spool_dir, "%s.%s.%s.res" % (username, task, submission.language)), "w", encoding="utf-8", ) total = 0.0 for num, evaluation in enumerate(submission.evaluations): outcome = float(evaluation.outcome) total += outcome line = "Executing on file n. %2d %s (%.4f)" % (num, evaluation.text, outcome) print >> res_file, line print >> res2_file, line line = "Score: %.6f" % total print >> res_file, line print >> res2_file, line res_file.close() res2_file.close() print >> queue_file queue_file.close() def export_ranking(self): """Exports the ranking in csv and txt (human-readable) form. """ logger.info("Exporting ranking.") # Create the structure to store the scores. scores = dict((user.username, 0.0) for user in self.contest.users if not user.hidden) task_scores = dict( (task.id, dict((user.username, 0.0) for user in self.contest.users if not user.hidden)) for task in self.contest.tasks ) last_scores = dict( (task.id, dict((user.username, 0.0) for user in self.contest.users if not user.hidden)) for task in self.contest.tasks ) # Make the score type compute the scores. scorers = {} for task in self.contest.tasks: scorers[task.id] = get_score_type(dataset=task.active_dataset) for submission in self.submissions: scorers[submission.task_id].add_submission( submission.id, submission.timestamp, submission.user.username, dict((ev.num, float(ev.outcome)) for ev in submission.evaluations), submission.tokened(), ) # Put together all the scores. for submission in self.submissions: task_id = submission.task_id username = submission.user.username details = scorers[task_id].pool[submission.id] last_scores[task_id][username] = details["score"] if details["tokened"]: task_scores[task_id][username] = max(task_scores[task_id][username], details["score"]) # Merge tokened and last submissions. for username in scores: for task_id in task_scores: task_scores[task_id][username] = max(task_scores[task_id][username], last_scores[task_id][username]) print username, [task_scores[task_id][username] for task_id in task_scores] scores[username] = sum(task_scores[task_id][username] for task_id in task_scores) sorted_usernames = sorted(scores.keys(), key=lambda username: (scores[username], username), reverse=True) sorted_tasks = sorted(self.contest.tasks, key=lambda task: task.num) ranking_file = codecs.open(os.path.join(self.spool_dir, "classifica.txt"), "w", encoding="utf-8") ranking_csv = codecs.open(os.path.join(self.spool_dir, "classifica.csv"), "w", encoding="utf-8") # Write rankings' header. n_tasks = len(sorted_tasks) print >> ranking_file, "Classifica finale del contest `%s'" % self.contest.description points_line = " %10s" * n_tasks csv_points_line = ",%s" * n_tasks print >> ranking_file, ("%20s %10s" % ("Utente", "Totale")) + ( points_line % tuple([t.name for t in sorted_tasks]) ) print >> ranking_csv, ("%s,%s" % ("utente", "totale")) + ( csv_points_line % tuple([t.name for t in sorted_tasks]) ) # Write rankings' content. points_line = " %10.3f" * n_tasks csv_points_line = ",%.6f" * n_tasks for username in sorted_usernames: user_scores = [task_scores[task.id][username] for task in sorted_tasks] print >> ranking_file, ("%20s %10.3f" % (username, scores[username])) + (points_line % tuple(user_scores)) print >> ranking_csv, ("%s,%.6f" % (username, scores[username])) + (csv_points_line % tuple(user_scores)) ranking_file.close() ranking_csv.close()
class ContestExporter: """This service exports every data about the contest that CMS knows. The process of exporting and importing again should be idempotent. """ def __init__(self, contest_id, dump, export_target, skip_submissions, light): self.contest_id = contest_id self.dump = dump self.skip_submissions = skip_submissions self.light = light # If target is not provided, we use the contest's name. if export_target == "": with SessionGen(commit=False) as session: contest = Contest.get_from_id(self.contest_id, session) self.export_target = "dump_%s.tar.gz" % contest.name else: self.export_target = export_target self.file_cacher = FileCacher() def run(self): """Interface to make the class do its job.""" return self.do_export() def do_export(self): """Run the actual export code. """ logger.operation = "exporting contest %d" % self.contest_id logger.info("Starting export.") export_dir = self.export_target archive_info = get_archive_info(self.export_target) if archive_info["write_mode"] != "": # We are able to write to this archive. if os.path.exists(self.export_target): logger.error("The specified file already exists, " "I won't overwrite it.") return False export_dir = os.path.join(tempfile.mkdtemp(), archive_info["basename"]) logger.info("Creating dir structure.") try: os.mkdir(export_dir) except OSError: logger.error("The specified directory already exists, " "I won't overwrite it.") return False files_dir = os.path.join(export_dir, "files") descr_dir = os.path.join(export_dir, "descriptions") os.mkdir(files_dir) os.mkdir(descr_dir) with SessionGen(commit=False) as session: contest = Contest.get_from_id(self.contest_id, session) # Export files. logger.info("Exporting files.") files = contest.enumerate_files(self.skip_submissions, light=self.light) for _file in files: if not self.safe_get_file(_file, os.path.join(files_dir, _file), os.path.join(descr_dir, _file)): return False # Export the contest in JSON format. logger.info("Exporting the contest in JSON format.") with open(os.path.join(export_dir, "contest.json"), 'w') as fout: json.dump(contest.export_to_dict(self.skip_submissions), fout, indent=4) if self.dump: if not self.dump_database(export_dir): return False # If the admin requested export to file, we do that. if archive_info["write_mode"] != "": archive = tarfile.open(self.export_target, archive_info["write_mode"]) archive.add(export_dir, arcname=archive_info["basename"]) archive.close() shutil.rmtree(export_dir) logger.info("Export finished.") logger.operation = "" return True def dump_database(self, export_dir): """Dump the whole database. This is never used; however, this part is retained for historical reasons. """ # Warning: this part depends on the specific database used. logger.info("Dumping SQL database.") (engine, connection) = config.database.split(':', 1) db_exportfile = os.path.join(export_dir, "database_dump.sql") # Export procedure for PostgreSQL. if engine == 'postgresql': db_regex = re.compile('//(\w*):(\w*)@(\w*)/(\w*)') db_match = db_regex.match(connection) if db_match is not None: username, password, host, database = db_match.groups() os.environ['PGPASSWORD'] = password export_res = os.system('pg_dump -h %s -U %s -w %s -x " \ "--attribute-inserts > %s' % (host, username, database, db_exportfile)) del os.environ['PGPASSWORD'] if export_res != 0: logger.critical("Database export failed.") return False else: logger.critical("Cannot obtain parameters for " "database connection.") return False # Export procedure for SQLite. elif engine == 'sqlite': db_regex = re.compile('///(.*)') db_match = db_regex.match(connection) if db_match is not None: dbfile, = db_match.groups() export_res = os.system('sqlite3 %s .dump > %s' % (dbfile, db_exportfile)) if export_res != 0: logger.critical("Database export failed.") return False else: logger.critical("Cannot obtain parameters for " "database connection.") return False else: logger.critical("Database engine not supported. :-(") return False return True def safe_get_file(self, digest, path, descr_path=None): """Get file from FileCacher ensuring that the digest is correct. digest (string): the digest of the file to retrieve. path (string): the path where to save the file. descr_path (string): the path where to save the description. return (bool): True if all ok, False if something wrong. """ # First get the file try: self.file_cacher.get_file(digest, path=path) except Exception as error: logger.error("File %s could not retrieved from file server (%r)." % (digest, error)) return False # Then check the digest calc_digest = sha1sum(path) if digest != calc_digest: logger.error("File %s has wrong hash %s." % (digest, calc_digest)) return False # If applicable, retrieve also the description if descr_path is not None: with open(descr_path, 'w') as fout: fout.write(self.file_cacher.describe(digest)) return True
class TestFileCacher(TestService): """Service that performs automatically some tests for the FileCacher service. """ def __init__(self, shard): logger.initialize(ServiceCoord("TestFileCacher", shard)) TestService.__init__(self, shard, custom_logger=logger) # Assume we store the cache in "./cache/fs-cache-TestFileCacher-0/" self.cache_base_path = os.path.join(config.cache_dir, "fs-cache-TestFileCacher-0") self.cache_path = None self.content = None self.fake_content = None self.digest = None self.file_obj = None self.file_cacher = FileCacher(self) #self.file_cacher = FileCacher(self, path="fs-storage") def prepare(self): """Initialization for the test code - make sure that the cache is empty before testing. """ logger.info("Please delete directory %s before." % self.cache_base_path) ### TEST 000 ### def test_000(self): """Send a ~100B random binary file to the storage through FileCacher as a file-like object. FC should cache the content locally. """ self.size = 100 self.content = "".join(chr(random.randint(0, 255)) for unused_i in xrange(self.size)) logger.info(" I am sending the ~100B binary file to FileCacher") try: data = self.file_cacher.put_file(file_obj=StringIO(self.content), description="Test #000") except Exception as error: self.test_end(False, "Error received: %r." % error) if not os.path.exists( os.path.join(self.cache_base_path, "objects", data)): self.test_end(False, "File not stored in local cache.") elif open(os.path.join(self.cache_base_path, "objects", data), "rb").read() != self.content: self.test_end(False, "Local cache's content differ " "from original file.") else: self.cache_path = os.path.join(self.cache_base_path, "objects", data) self.digest = data self.test_end(True, "Data sent and cached without error.") ### TEST 001 ### def test_001(self): """Retrieve the file. """ logger.info(" I am retrieving the ~100B binary file from FileCacher") self.fake_content = "Fake content.\n" with open(self.cache_path, "wb") as cached_file: cached_file.write(self.fake_content) try: data = self.file_cacher.get_file(digest=self.digest, temp_file_obj=True) except Exception as error: self.test_end(False, "Error received: %r." % error) received = data.read() data.close() if received != self.fake_content: if received == self.content: self.test_end(False, "Did not use the cache even if it could.") else: self.test_end(False, "Content differ.") else: self.test_end(True, "Data object received correctly.") ### TEST 002 ### def test_002(self): """Check the size of the file. """ logger.info(" I am checking the size of the ~100B binary file") try: size = self.file_cacher.get_size(self.digest) except Exception as error: self.test_end(False, "Error received: %r." % error) if size == self.size: self.test_end(True, "The size is correct.") else: self.test_end(False, "The size is wrong: %d instead of %d" % (size, self.size)) ### TEST 003 ### def test_003(self): """Get file from FileCacher. """ logger.info(" I am retrieving the file from FileCacher " + "after deleting the cache.") os.unlink(self.cache_path) try: data = self.file_cacher.get_file(digest=self.digest, temp_file_obj=True) except Exception as error: self.test_end(False, "Error received: %r." % error) received = data.read() data.close() if received != self.content: self.test_end(False, "Content differ.") elif not os.path.exists(self.cache_path): self.test_end(False, "File not stored in local cache.") elif open(self.cache_path).read() != self.content: self.test_end(False, "Local cache's content differ " + "from original file.") else: self.test_end(True, "Content object received " + "and cached correctly.") ### TEST 004 ### def test_004(self): """Delete the file through FS and tries to get it again through FC. """ logger.info(" I am deleting the file from FileCacher.") try: self.file_cacher.delete(digest=self.digest) except Exception as error: self.test_end(False, "Error received: %s." % error) else: logger.info(" File deleted correctly.") logger.info(" I am getting the file from FileCacher.") try: self.file_cacher.get_file(digest=self.digest) except Exception as error: self.test_end(True, "Correctly received an error: %r." % error) else: self.test_end(False, "Did not receive error.") ### TEST 005 ### def test_005(self): """Get unexisting file from FileCacher. """ logger.info(" I am retrieving an unexisting file from FileCacher.") try: self.file_cacher.get_file(digest=self.digest, temp_file_obj=True) except Exception as error: self.test_end(True, "Correctly received an error: %r." % error) else: self.test_end(False, "Did not receive error.") ### TEST 006 ### def test_006(self): """Send a ~100B random binary file to the storage through FileCacher as a string. FC should cache the content locally. """ self.content = "".join(chr(random.randint(0, 255)) for unused_i in xrange(100)) logger.info(" I am sending the ~100B binary file to FileCacher") try: data = self.file_cacher.put_file(binary_data=self.content, description="Test #005") except Exception as error: self.test_end(False, "Error received: %r." % error) if not os.path.exists( os.path.join(self.cache_base_path, "objects", data)): self.test_end(False, "File not stored in local cache.") elif open(os.path.join(self.cache_base_path, "objects", data), "rb").read() != self.content: self.test_end(False, "Local cache's content differ " "from original file.") else: self.cache_path = os.path.join(self.cache_base_path, "objects", data) self.digest = data self.test_end(True, "Data sent and cached without error.") ### TEST 007 ### def test_007(self): """Retrieve the file as a string. """ logger.info(" I am retrieving the ~100B binary file from FileCacher " "using get_file_to_string()") self.fake_content = "Fake content.\n" with open(self.cache_path, "wb") as cached_file: cached_file.write(self.fake_content) try: data = self.file_cacher.get_file(digest=self.digest, string=True) except Exception as error: self.test_end(False, "Error received: %r." % error) if data != self.fake_content: if data == self.content: self.test_end(False, "Did not use the cache even if it could.") else: self.test_end(False, "Content differ.") else: self.test_end(True, "Data received correctly.") ### TEST 008 ### def test_008(self): """Put a ~100MB file into the storage (using a specially crafted file-like object). """ logger.info(" I am sending the ~100MB binary file to FileCacher") rand_file = RandomFile(100000000) try: data = self.file_cacher.put_file(file_obj=rand_file, description="Test #007") except Exception as error: self.test_end(False, "Error received: %r." % error) if rand_file.dim != 0: self.test_end(False, "The input file wasn't read completely.") my_digest = rand_file.digest rand_file.close() if not os.path.exists( os.path.join(self.cache_base_path, "objects", data)): self.test_end(False, "File not stored in local cache.") elif my_digest != data: self.test_end(False, "File received with wrong hash.") else: self.cache_path = os.path.join(self.cache_base_path, "objects", data) self.digest = data self.test_end(True, "Data sent and cached without error.") ### TEST 009 ### def test_009(self): """Get the ~100MB file from FileCacher. """ logger.info(" I am retrieving the ~100MB file from FileCacher " + "after deleting the cache.") os.unlink(self.cache_path) hash_file = HashingFile() try: self.file_cacher.get_file(digest=self.digest, file_obj=hash_file) except Exception as error: self.test_end(False, "Error received: %r." % error) my_digest = hash_file.digest hash_file.close() try: if self.digest != my_digest: self.test_end(False, "Content differs.") elif not os.path.exists(self.cache_path): self.test_end(False, "File not stored in local cache.") else: self.test_end(True, "Content object received " + "and cached correctly.") finally: self.file_cacher.delete(self.digest)
class Worker(Service): """This service implement the possibility to compile and evaluate submissions in a sandbox. The instructions to follow for the operations are in the TaskType classes, while the sandbox is in the Sandbox module. """ JOB_TYPE_COMPILATION = "compile" JOB_TYPE_EVALUATION = "evaluate" def __init__(self, shard): logger.initialize(ServiceCoord("Worker", shard)) Service.__init__(self, shard, custom_logger=logger) self.file_cacher = FileCacher(self) self.task_type = None self.work_lock = threading.Lock() self.session = None @rpc_method def ignore_job(self): """RPC that inform the worker that its result for the current action will be discarded. The worker will try to return as soon as possible even if this means that the result are inconsistent. """ # We inform the task_type to quit as soon as possible. logger.info("Trying to interrupt job as requested.") try: self.task_type.ignore_job = True except AttributeError: pass # Job concluded right under our nose, that's ok too. # FIXME - rpc_threaded is disable because it makes the call fail: # we should investigate on this @rpc_method @rpc_threaded def precache_files(self, contest_id): """RPC to ask the worker to precache of files in the contest. contest_id (int): the id of the contest """ # Lock is not needed if the admins correctly placed cache and # temp directories in the same filesystem. This is what # usually happens since they are children of the same, # cms-created, directory. logger.info("Precaching files for contest %d." % contest_id) with SessionGen(commit=False) as session: contest = Contest.get_from_id(contest_id, session) for digest in contest.enumerate_files(skip_submissions=True): self.file_cacher.get_file(digest) logger.info("Precaching finished.") @rpc_method @rpc_threaded def execute_job(self, job_dict): job = Job.import_from_dict_with_type(job_dict) if self.work_lock.acquire(False): try: logger.operation = "job '%s'" % (job.info) logger.info("Request received") job.shard = self.shard self.task_type = get_task_type(job, self.file_cacher) self.task_type.execute_job() logger.info("Request finished.") return job.export_to_dict() except: err_msg = "Worker failed on operation `%s'" % logger.operation logger.error("%s\n%s" % (err_msg, traceback.format_exc())) raise JobException(err_msg) finally: self.task_type = None self.session = None logger.operation = "" self.work_lock.release() else: err_msg = "Request '%s' received, " \ "but declined because of acquired lock" % \ (job.info) logger.warning(err_msg) raise JobException(err_msg)
class ContestExporter: """This service exports every data about the contest that CMS knows. The process of exporting and importing again should be idempotent. """ def __init__(self, contest_id, export_target, dump_files, dump_model, light, skip_submissions, skip_user_tests): self.contest_id = contest_id self.dump_files = dump_files self.dump_model = dump_model self.light = light self.skip_submissions = skip_submissions self.skip_user_tests = skip_user_tests # If target is not provided, we use the contest's name. if export_target == "": with SessionGen(commit=False) as session: contest = Contest.get_from_id(self.contest_id, session) self.export_target = "dump_%s.tar.gz" % contest.name logger.warning("export_target not given, using \"%s\"" % self.export_target) else: self.export_target = export_target self.file_cacher = FileCacher() def do_export(self): """Run the actual export code.""" logger.operation = "exporting contest %d" % self.contest_id logger.info("Starting export.") export_dir = self.export_target archive_info = get_archive_info(self.export_target) if archive_info["write_mode"] != "": # We are able to write to this archive. if os.path.exists(self.export_target): logger.critical("The specified file already exists, " "I won't overwrite it.") return False export_dir = os.path.join(tempfile.mkdtemp(), archive_info["basename"]) logger.info("Creating dir structure.") try: os.mkdir(export_dir) except OSError: logger.critical("The specified directory already exists, " "I won't overwrite it.") return False files_dir = os.path.join(export_dir, "files") descr_dir = os.path.join(export_dir, "descriptions") os.mkdir(files_dir) os.mkdir(descr_dir) with SessionGen(commit=False) as session: contest = Contest.get_from_id(self.contest_id, session) # Export files. if self.dump_files: logger.info("Exporting files.") files = contest.enumerate_files(self.skip_submissions, self.skip_user_tests, self.light) for file_ in files: if not self.safe_get_file(file_, os.path.join(files_dir, file_), os.path.join(descr_dir, file_)): return False # Export the contest in JSON format. if self.dump_model: logger.info("Exporting the contest to a JSON file.") # We use strings because they'll be the keys of a JSON # object; the contest will have ID 0. self.ids = {contest.sa_identity_key: "0"} self.queue = [contest] data = dict() while len(self.queue) > 0: obj = self.queue.pop(0) data[self.ids[obj.sa_identity_key]] = self.export_object(obj) # Specify the "root" of the data graph data["_objects"] = ["0"] with io.open(os.path.join(export_dir, "contest.json"), "wb") as fout: json.dump(data, fout, encoding="utf-8", indent=4, sort_keys=True) # If the admin requested export to file, we do that. if archive_info["write_mode"] != "": archive = tarfile.open(self.export_target, archive_info["write_mode"]) archive.add(export_dir, arcname=archive_info["basename"]) archive.close() shutil.rmtree(export_dir) logger.info("Export finished.") logger.operation = "" return True def get_id(self, obj): obj_key = obj.sa_identity_key if obj_key not in self.ids: # We use strings because they'll be the keys of a JSON object self.ids[obj_key] = str(len(self.ids)) self.queue.append(obj) return self.ids[obj_key] def export_object(self, obj): """Export the given object, returning a JSON-encodable dict. The returned dict will contain a "_class" item (the name of the class of the given object), an item for each column property (with a value properly translated to a JSON-compatible type) and an item for each relationship property (which will be an ID or a collection of IDs). The IDs used in the exported dict aren't related to the ones used in the DB: they are newly generated and their scope is limited to the exported file only. They are shared among all classes (that is, two objects can never share the same ID, even if they are of different classes). If, when exporting the relationship, we find an object without an ID we generate a new ID, assign it to the object and append the object to the queue of objects to export. The self.skip_submissions flag controls wheter we export submissions (and all other objects that can be reached only by passing through a submission) or not. """ cls = type(obj) data = {"_class": cls.__name__} for prp in cls._col_props: col, = prp.columns col_type = type(col.type) val = getattr(obj, prp.key) if col_type in [Boolean, Integer, Float, String]: data[prp.key] = val elif col_type is DateTime: data[prp.key] = \ make_timestamp(val) if val is not None else None elif col_type is Interval: data[prp.key] = \ val.total_seconds() if val is not None else None else: raise RuntimeError("Unknown SQLAlchemy column type: %s" % col_type) for prp in cls._rel_props: other_cls = prp.mapper.class_ # Skip submissions if requested if self.skip_submissions and other_cls is Submission: continue # Skip user_tests if requested if self.skip_user_tests and other_cls is UserTest: continue val = getattr(obj, prp.key) if val is None: data[prp.key] = None elif isinstance(val, other_cls): data[prp.key] = self.get_id(val) elif isinstance(val, list): data[prp.key] = list(self.get_id(i) for i in val) elif isinstance(val, dict): data[prp.key] = \ dict((k, self.get_id(v)) for k, v in val.iteritems()) else: raise RuntimeError("Unknown SQLAlchemy relationship type: %s" % type(val)) return data def safe_get_file(self, digest, path, descr_path=None): """Get file from FileCacher ensuring that the digest is correct. digest (string): the digest of the file to retrieve. path (string): the path where to save the file. descr_path (string): the path where to save the description. return (bool): True if all ok, False if something wrong. """ # TODO - Probably this method could be merged in FileCacher # First get the file try: self.file_cacher.get_file(digest, path=path) except Exception as error: logger.error("File %s could not retrieved from file server (%r)." % (digest, error)) return False # Then check the digest calc_digest = sha1sum(path) if digest != calc_digest: logger.critical("File %s has wrong hash %s." % (digest, calc_digest)) return False # If applicable, retrieve also the description if descr_path is not None: with io.open(descr_path, 'wt', encoding='utf-8') as fout: fout.write(self.file_cacher.describe(digest)) return True
class Worker(Service): """This service implement the possibility to compile and evaluate submissions in a sandbox. The instructions to follow for the operations are in the TaskType classes, while the sandbox is in the Sandbox module. """ JOB_TYPE_COMPILATION = "compile" JOB_TYPE_EVALUATION = "evaluate" def __init__(self, shard): logger.initialize(ServiceCoord("Worker", shard)) Service.__init__(self, shard, custom_logger=logger) self.file_cacher = FileCacher(self) self.work_lock = threading.Lock() self.ignore_job = False @rpc_method def ignore_job(self): """RPC that inform the worker that its result for the current action will be discarded. The worker will try to return as soon as possible even if this means that the result are inconsistent. """ # We remember to quit as soon as possible. logger.info("Trying to interrupt job as requested.") self.ignore_job = True # FIXME - rpc_threaded is disable because it makes the call fail: # we should investigate on this @rpc_method @rpc_threaded def precache_files(self, contest_id): """RPC to ask the worker to precache of files in the contest. contest_id (int): the id of the contest """ # Lock is not needed if the admins correctly placed cache and # temp directories in the same filesystem. This is what # usually happens since they are children of the same, # cms-created, directory. logger.info("Precaching files for contest %d." % contest_id) with SessionGen(commit=False) as session: contest = Contest.get_from_id(contest_id, session) for digest in contest.enumerate_files(skip_submissions=True, skip_user_tests=True): self.file_cacher.get_file(digest) logger.info("Precaching finished.") @rpc_method @rpc_threaded def execute_job_group(self, job_group_dict): job_group = JobGroup.import_from_dict(job_group_dict) if self.work_lock.acquire(False): try: self.ignore_job = False for k, job in job_group.jobs.iteritems(): logger.operation = "job '%s'" % (job.info) logger.info("Request received") job.shard = self.shard # FIXME This is actually kind of a workaround... # The only TaskType that needs it is OutputOnly. job._key = k # FIXME We're creating a new TaskType for each Job # even if, at the moment, a JobGroup always uses # the same TaskType and the same parameters. Yet, # this could change in the future, so the best # solution is to keep a cache of TaskTypes objects # (like ScoringService does with ScoreTypes, except # that we cannot index by Dataset ID here...). task_type = get_task_type(job.task_type, job.task_type_parameters) task_type.execute_job(job, self.file_cacher) logger.info("Request finished.") if not job.success or self.ignore_job: job_group.success = False break else: job_group.success = True return job_group.export_to_dict() except: err_msg = "Worker failed on operation `%s'" % logger.operation logger.error("%s\n%s" % (err_msg, traceback.format_exc())) raise JobException(err_msg) finally: logger.operation = "" self.work_lock.release() else: err_msg = "Request '%s' received, " \ "but declined because of acquired lock" % \ (job.info) logger.warning(err_msg) raise JobException(err_msg)
class TestFileCacher(TestService): """Service that performs automatically some tests for the FileCacher service. """ def __init__(self, shard): logger.initialize(ServiceCoord("TestFileCacher", shard)) TestService.__init__(self, shard, custom_logger=logger) # Assume we store the cache in "./cache/fs-cache-TestFileCacher-0/" self.cache_base_path = os.path.join(config.cache_dir, "fs-cache-TestFileCacher-0") self.cache_path = None self.content = None self.fake_content = None self.digest = None self.file_obj = None self.file_cacher = FileCacher(self) #self.file_cacher = FileCacher(self, path="fs-storage") def prepare(self): """Initialization for the test code - make sure that the cache is empty before testing. """ logger.info("Please delete directory %s before." % self.cache_base_path) ### TEST 000 ### def test_000(self): """Send a ~100B random binary file to the storage through FileCacher as a file-like object. FC should cache the content locally. """ self.size = 100 self.content = "".join( chr(random.randint(0, 255)) for unused_i in xrange(self.size)) logger.info(" I am sending the ~100B binary file to FileCacher") try: data = self.file_cacher.put_file(file_obj=StringIO(self.content), description="Test #000") except Exception as error: self.test_end(False, "Error received: %r." % error) if not os.path.exists( os.path.join(self.cache_base_path, "objects", data)): self.test_end(False, "File not stored in local cache.") elif open(os.path.join(self.cache_base_path, "objects", data), "rb").read() != self.content: self.test_end( False, "Local cache's content differ " "from original file.") else: self.cache_path = os.path.join(self.cache_base_path, "objects", data) self.digest = data self.test_end(True, "Data sent and cached without error.") ### TEST 001 ### def test_001(self): """Retrieve the file. """ logger.info(" I am retrieving the ~100B binary file from FileCacher") self.fake_content = "Fake content.\n" with open(self.cache_path, "wb") as cached_file: cached_file.write(self.fake_content) try: data = self.file_cacher.get_file(digest=self.digest, temp_file_obj=True) except Exception as error: self.test_end(False, "Error received: %r." % error) received = data.read() data.close() if received != self.fake_content: if received == self.content: self.test_end(False, "Did not use the cache even if it could.") else: self.test_end(False, "Content differ.") else: self.test_end(True, "Data object received correctly.") ### TEST 002 ### def test_002(self): """Check the size of the file. """ logger.info(" I am checking the size of the ~100B binary file") try: size = self.file_cacher.get_size(self.digest) except Exception as error: self.test_end(False, "Error received: %r." % error) if size == self.size: self.test_end(True, "The size is correct.") else: self.test_end( False, "The size is wrong: %d instead of %d" % (size, self.size)) ### TEST 003 ### def test_003(self): """Get file from FileCacher. """ logger.info(" I am retrieving the file from FileCacher " + "after deleting the cache.") os.unlink(self.cache_path) try: data = self.file_cacher.get_file(digest=self.digest, temp_file_obj=True) except Exception as error: self.test_end(False, "Error received: %r." % error) received = data.read() data.close() if received != self.content: self.test_end(False, "Content differ.") elif not os.path.exists(self.cache_path): self.test_end(False, "File not stored in local cache.") elif open(self.cache_path).read() != self.content: self.test_end( False, "Local cache's content differ " + "from original file.") else: self.test_end(True, "Content object received " + "and cached correctly.") ### TEST 004 ### def test_004(self): """Delete the file through FS and tries to get it again through FC. """ logger.info(" I am deleting the file from FileCacher.") try: self.file_cacher.delete(digest=self.digest) except Exception as error: self.test_end(False, "Error received: %s." % error) else: logger.info(" File deleted correctly.") logger.info(" I am getting the file from FileCacher.") try: self.file_cacher.get_file(digest=self.digest) except Exception as error: self.test_end(True, "Correctly received an error: %r." % error) else: self.test_end(False, "Did not receive error.") ### TEST 005 ### def test_005(self): """Get unexisting file from FileCacher. """ logger.info(" I am retrieving an unexisting file from FileCacher.") try: self.file_cacher.get_file(digest=self.digest, temp_file_obj=True) except Exception as error: self.test_end(True, "Correctly received an error: %r." % error) else: self.test_end(False, "Did not receive error.") ### TEST 006 ### def test_006(self): """Send a ~100B random binary file to the storage through FileCacher as a string. FC should cache the content locally. """ self.content = "".join( chr(random.randint(0, 255)) for unused_i in xrange(100)) logger.info(" I am sending the ~100B binary file to FileCacher") try: data = self.file_cacher.put_file(binary_data=self.content, description="Test #005") except Exception as error: self.test_end(False, "Error received: %r." % error) if not os.path.exists( os.path.join(self.cache_base_path, "objects", data)): self.test_end(False, "File not stored in local cache.") elif open(os.path.join(self.cache_base_path, "objects", data), "rb").read() != self.content: self.test_end( False, "Local cache's content differ " "from original file.") else: self.cache_path = os.path.join(self.cache_base_path, "objects", data) self.digest = data self.test_end(True, "Data sent and cached without error.") ### TEST 007 ### def test_007(self): """Retrieve the file as a string. """ logger.info(" I am retrieving the ~100B binary file from FileCacher " "using get_file_to_string()") self.fake_content = "Fake content.\n" with open(self.cache_path, "wb") as cached_file: cached_file.write(self.fake_content) try: data = self.file_cacher.get_file(digest=self.digest, string=True) except Exception as error: self.test_end(False, "Error received: %r." % error) if data != self.fake_content: if data == self.content: self.test_end(False, "Did not use the cache even if it could.") else: self.test_end(False, "Content differ.") else: self.test_end(True, "Data received correctly.") ### TEST 008 ### def test_008(self): """Put a ~100MB file into the storage (using a specially crafted file-like object). """ logger.info(" I am sending the ~100MB binary file to FileCacher") rand_file = RandomFile(100000000) try: data = self.file_cacher.put_file(file_obj=rand_file, description="Test #007") except Exception as error: self.test_end(False, "Error received: %r." % error) if rand_file.dim != 0: self.test_end(False, "The input file wasn't read completely.") my_digest = rand_file.digest rand_file.close() if not os.path.exists( os.path.join(self.cache_base_path, "objects", data)): self.test_end(False, "File not stored in local cache.") elif my_digest != data: self.test_end(False, "File received with wrong hash.") else: self.cache_path = os.path.join(self.cache_base_path, "objects", data) self.digest = data self.test_end(True, "Data sent and cached without error.") ### TEST 009 ### def test_009(self): """Get the ~100MB file from FileCacher. """ logger.info(" I am retrieving the ~100MB file from FileCacher " + "after deleting the cache.") os.unlink(self.cache_path) hash_file = HashingFile() try: self.file_cacher.get_file(digest=self.digest, file_obj=hash_file) except Exception as error: self.test_end(False, "Error received: %r." % error) my_digest = hash_file.digest hash_file.close() try: if self.digest != my_digest: self.test_end(False, "Content differs.") elif not os.path.exists(self.cache_path): self.test_end(False, "File not stored in local cache.") else: self.test_end( True, "Content object received " + "and cached correctly.") finally: self.file_cacher.delete(self.digest)