def __init__(self, file_path, store_title): """ Ctor """ object.__init__(self) # State self.title = None # Time self.start_time = time.time() self.end_time = None # Loaded entries self.entries = [] # ClassifierResultGroup objects (name, classifier, result) self.classifier_results = [] # OtherResult objects (file_name lines) self.other_result_files = [] # StorerAndPrinter - stores and prints ;) time_printer = util.prtr.TimePrinter(name="exp") self.storer_printer = util.prtr.StorerAndPrinter(printer=time_printer) # Paths and name self.file_path = os.path.expanduser(file_path) self.input_file_name = os.path.basename(self.file_path) experiment_dir_name = None if not os.path.lexists(self.file_path): util.outp.exit_on_error("Input file not found: %s" % self.file_path) self.title = store_title if self.title is None: random_num_str = "".join( str(x) for x in (random.sample(range(0, 15), 5))) self.title = "Experiment %s" % random_num_str experiment_dir_name = Dir.remove_disallowed_characters( self.title.lower()) experiment_dir_name += time.strftime("_%m-%d_%H-%M") self.experiment_dir_path = self.get_experiment_folder( experiment_dir_name) if os.path.lexists(self.experiment_dir_path): self.experiment_dir_path = Dir.uniquify(self.experiment_dir_path)
def _sample(file_path, number_of_elements, limit_to): """ Sample <number_of_elements> from the given file. """ print("Sampling...") target_file_path = "%s_%s-sample" % (file_path, number_of_elements) if not os.path.lexists(file_path): raise IOError("Input file doesn't exist") target_file_path = Dir.uniquify(target_file_path) line_generator = Dir.yield_lines(file_path) log_lines = None if limit_to is None: log_lines = ids_tools.reservoir_sample(line_generator, number_of_elements) else: log_lines = ids_tools.reservoir_sample_limit(line_generator, number_of_elements, limit_to) Dir.write_lines(target_file_path, log_lines) print("Done. Wrote to file:\n%s" % target_file_path)
def store_experiment(self): """ Store the results saved in this class in our experiment directory. """ self.end_time = time.time() self.storer_printer.prt("Storing experiment results...") Dir.ensure_folder_exists(self.experiment_dir_path) entry_file_path = os.path.join(self.experiment_dir_path, "used_entries") result_file_path = os.path.join(self.experiment_dir_path, "result") stdout_file_path = os.path.join(self.experiment_dir_path, "stdout") classifiers_file_path = os.path.join(self.experiment_dir_path, "classifiers") file_paths = [ entry_file_path, result_file_path, stdout_file_path, classifiers_file_path ] other_result_files_paths = [] for file_name, _ in self.other_result_files: oth_res_path_creation = os.path.join(self.experiment_dir_path, file_name) oth_res_path_creation = Dir.uniquify(oth_res_path_creation) other_result_files_paths.append(oth_res_path_creation) if any([ os.path.lexists(x) for x in file_paths + other_result_files_paths ]): raise IOError("One of the files exists: %s" % (file_paths + other_result_files_paths)) self.storer_printer.prt("Data verified. Storing utilised entries...") # Create new file with my entries saved_path = idse_dao.save_entries(entry_file_path, self.entries) self.storer_printer.prt("Done. Analysing file...") # Analyse that file log_file_analysis.analyse(saved_path, to_file=True, output_printer=util.prtr.Storer()) self.storer_printer.prt("Done. Saving classifiers...") # Save trained classifiers classifier_lines = self.create_classifier_lines() Dir.write_lines(classifiers_file_path, classifier_lines) self.storer_printer.prt("Done. Saving result digest...") # Save the result result_lines = self.create_result_lines() Dir.write_lines(result_file_path, result_lines) if self.other_result_files: for oth_res_path, (oth_res_name, oth_res_lines) in zip(other_result_files_paths, self.other_result_files): self.storer_printer.prt("Saving others: %s..." % oth_res_name) Dir.write_lines(oth_res_path, oth_res_lines) self.storer_printer.prt("Done!") self.storer_printer.prt("Experiment stored in: %s" % self.experiment_dir_path) # Save the stdout (tee replacement) stdout_lines = self.storer_printer.get_messages() Dir.write_lines(stdout_file_path, stdout_lines)