def test_read_file_info_for_failure(self): self.setup_class() # Set project root path tests_dir = os.path.join(self.project_dir, 'tests') tmp_dir = os.path.join(tests_dir, 'tmp') fail_file = os.path.join(tmp_dir, 'test_failure.mkv') # Test common.touch(fail_file) import pytest with pytest.raises(FFMPEGHandleFFProbeError): self.ffmpeg.file_probe(fail_file)
def test_read_file_info_for_failure(self): """ Ensure that and exception is thrown if ffprobe is unable to read a file. :return: """ # Set project root path tests_dir = os.path.join(self.project_dir, 'tests') tmp_dir = os.path.join(tests_dir, 'tmp') fail_file = os.path.join(tmp_dir, 'test_failure.mkv') # Test common.touch(fail_file) import pytest with pytest.raises(unffmpeg.exceptions.ffprobe.FFProbeError): self.ffmpeg.file_probe(fail_file)
def run(self): self.logger.info("Start a gp task with %s" % (self.gp_params)) score_file_name = os.path.join(self.job_dir, "fitness_scores.pickle") self.fitness_scores = {} self.popul = self.initial_population() self.generation = 1 while self.generation <= self.max_gen: self.logger.info( "There're %d variants in population at generation %d." % (len(self.popul), self.generation)) file_paths = self.save_variants_to_files() scores = self.fitness(file_paths, self.seed_sha1) # Introduce a fake score for testing tracing. # scores = [0.1, 0.2] * (self.pop_size/2) self.fitness_scores[self.generation] = scores pickle.dump(self.fitness_scores, open(score_file_name, 'wb')) self.logger.info("Fitness scores: %s" % scores) self.logger.info("Sorted fitness: %s" % sorted(scores, reverse=True)) if max(scores) > self.fitness_threshold: best_score = max(scores) self.logger.info( "Already got a high score [%.2f]>%.2f variant, break the GP process." % (max(scores), self.fitness_threshold)) # Store the success traces. for i in range(len(scores)): score = scores[i] if score > self.fitness_threshold: success_trace = self.popul[i].active_trace self.success_traces.append(success_trace) # Dump the new generated traces. # We assume no concurrent GP tasks depending on the traces. Trace.dump_traces(self.success_traces, self.success_traces_path) touch(os.path.join(self.job_dir, finished_flag)) break elif self.generation == max_gen: self.logger.info("Failed at max generation.") if max(scores) > self.seed_fitness: best_gen, best_vid, best_score = self.get_best_variant( 1, self.generation) promising_trace = self.load_variant_trace( best_gen, best_vid) self.logger.info("Save the promising trace %.2f of %d:%d" % (best_score, best_gen, best_vid)) self.promising_traces.append(promising_trace) Trace.dump_traces(self.promising_traces, self.promising_traces_path, exclude_traces=self.success_traces) break # Crossover if self.xover_rate > 0: self.popul = self.select(self.popul, scores, self.pop_size / 2) self.logger.debug( "After selecting goods and replacing bads, we have %d variants in population." % len(self.popul)) for p1, p2 in zip(self.popul[0::2], self.popul[1::2]): c1, c2 = PdfGenome.crossover(p1, p2) self.popul.append(c1) self.popul.append(c2) self.logger.debug( "After crossover, we have %d variants in population." % len(self.popul)) else: # No Crossover self.popul = self.select(self.popul, scores, self.pop_size) self.logger.debug( "After selecting goods and replacing bads, we have %d variants in population." % len(self.popul)) # Mutation for i in range(len(self.popul)): if i not in self.vid_from_trace: self.logger.debug("Generating %d:%d variant" % (self.generation + 1, i)) self.popul[i] = PdfGenome.mutation(self.popul[i], self.mut_rate, self.ext_genome) else: self.logger.debug("Keep %d:%d variant from trace." % (self.generation + 1, i)) self.generation = self.generation + 1 self.logger.info("Stopped the GP process with max fitness %.2f." % best_score) touch(os.path.join(self.job_dir, result_flag % best_score)) return True
from lib.fitness import fitness_hidost as fitness_func elif classifier_name == "hidost_pdfrate": from lib.fitness import fitness_hidost_pdfrate as fitness_func elif classifier_name == "hidost_pdfrate_mean": from lib.fitness import fitness_hidost_pdfrate_mean as fitness_func elif classifier_name == "hidost_pdfrate_sigmoid": from lib.fitness import fitness_hidost_pdfrate_sigmoid as fitness_func gp_params = {'pop_size': pop_size, 'max_gen': max_gen, \ 'mut_rate': mut_rate, 'xover_rate': xover_rate, \ 'fitness_threshold': stop_fitness} ext_genome = PdfGenome.load_external_genome(ext_genome_folder) try: gp = GPPdf( job_dir=job_dir, seed_sha1=start_hash, seed_file_path=start_file_path, logger=logger, random_state_file_path=random_state_file_path, ext_genome=ext_genome, success_traces_path=success_traces_path, promising_traces_path=promising_traces_path, gp_params=gp_params, fitness_function=fitness_func, ) gp.run() except Exception, e: touch(os.path.join(job_dir, error_flag)) logger.exception(e) sys.exit(1)
def run(self): self.logger.info("Start a gp task with %s" % (self.gp_params)) score_file_name = os.path.join(self.job_dir, "fitness_scores.pickle") self.fitness_scores = {} self.popul = self.initial_population() self.generation = 1 while self.generation <= self.max_gen: self.logger.info("There're %d variants in population at generation %d." % (len(self.popul), self.generation)) file_paths = self.save_variants_to_files() scores = self.fitness(file_paths, self.seed_sha1) # Introduce a fake score for testing tracing. # scores = [0.1, 0.2] * (self.pop_size/2) self.fitness_scores[self.generation] = scores pickle.dump(self.fitness_scores, open(score_file_name, 'wb')) self.logger.info("Fitness scores: %s" % scores) self.logger.info("Sorted fitness: %s" % sorted(scores, reverse=True)) if max(scores) > self.fitness_threshold: best_score = max(scores) self.logger.info("Already got a high score [%.2f]>%.2f variant, break the GP process." % (max(scores), self.fitness_threshold)) # Store the success traces. for i in range(len(scores)): score = scores[i] if score > self.fitness_threshold: success_trace = self.popul[i].active_trace self.success_traces.append(success_trace) # Dump the new generated traces. # We assume no concurrent GP tasks depending on the traces. Trace.dump_traces(self.success_traces, self.success_traces_path) touch(os.path.join(self.job_dir, finished_flag)) break elif self.generation == max_gen: self.logger.info("Failed at max generation.") if max(scores) > self.seed_fitness: best_gen, best_vid, best_score = self.get_best_variant(1, self.generation) promising_trace = self.load_variant_trace(best_gen, best_vid) self.logger.info("Save the promising trace %.2f of %d:%d" % (best_score, best_gen, best_vid)) self.promising_traces.append(promising_trace) Trace.dump_traces(self.promising_traces, self.promising_traces_path, exclude_traces=self.success_traces) break # Crossover if self.xover_rate > 0: self.popul = self.select(self.popul, scores, self.pop_size/2) self.logger.debug("After selecting goods and replacing bads, we have %d variants in population." % len(self.popul)) for p1,p2 in zip(self.popul[0::2], self.popul[1::2]): c1, c2 = PdfGenome.crossover(p1, p2) self.popul.append(c1) self.popul.append(c2) self.logger.debug("After crossover, we have %d variants in population." % len(self.popul)) else: # No Crossover self.popul = self.select(self.popul, scores, self.pop_size) self.logger.debug("After selecting goods and replacing bads, we have %d variants in population." % len(self.popul)) # Mutation for i in range(len(self.popul)): if i not in self.vid_from_trace: self.logger.debug("Generating %d:%d variant" % (self.generation+1, i)) self.popul[i] = PdfGenome.mutation(self.popul[i], self.mut_rate, self.ext_genome) else: self.logger.debug("Keep %d:%d variant from trace." % (self.generation+1, i)) self.generation = self.generation + 1 self.logger.info("Stopped the GP process with max fitness %.2f." % best_score) touch(os.path.join(self.job_dir, result_flag % best_score)) return True
elif classifier_name == 'hidost': from lib.fitness import fitness_hidost as fitness_func elif classifier_name == "hidost_pdfrate": from lib.fitness import fitness_hidost_pdfrate as fitness_func elif classifier_name == "hidost_pdfrate_mean": from lib.fitness import fitness_hidost_pdfrate_mean as fitness_func elif classifier_name == "hidost_pdfrate_sigmoid": from lib.fitness import fitness_hidost_pdfrate_sigmoid as fitness_func gp_params = {'pop_size': pop_size, 'max_gen': max_gen, \ 'mut_rate': mut_rate, 'xover_rate': xover_rate, \ 'fitness_threshold': stop_fitness} ext_genome = PdfGenome.load_external_genome(ext_genome_folder) try: gp = GPPdf( job_dir = job_dir, seed_sha1 = start_hash, seed_file_path = start_file_path, logger = logger, random_state_file_path = random_state_file_path, ext_genome = ext_genome, success_traces_path = success_traces_path, promising_traces_path = promising_traces_path, gp_params = gp_params, fitness_function = fitness_func, ) gp.run() except Exception, e: touch(os.path.join(job_dir, error_flag)) logger.exception(e) sys.exit(1)
class GPPdf: def __init__( self, job_dir, seed_sha1, seed_file_path, logger, random_state_file_path, ext_genome, success_traces_path, promising_traces_path, gp_params, fitness_function, ): self.logger = logger self.job_dir = job_dir self.seed_sha1 = seed_sha1 # Load the pre-defined random state for reproducing the existing results. if random_state_file_path: try: random_state = pickle.load(open(random_state_file_path, 'rb')) random.setstate(random_state) logger.debug("Loaded a random state from %s" % random_state_file_path) except: logger.warning("Failed to load random state from %s" % random_state_file_path) # Save random state for reproducing results in the future. random_state_file = os.path.join(self.job_dir, "random_state.pickle") random_state = random.getstate() pickle.dump(random_state, open(random_state_file, 'wb')) self.fitness_func = fitness_function # Load the seed. self.seed_file_path = seed_file_path self.seed_fitness = self.fitness([self.seed_file_path], self.seed_sha1)[0] self.seed_root = PdfGenome.load_genome(seed_file_path) self.logger.info("Loaded %s as PDF seed, fitness %.2f." % (seed_file_path, self.seed_fitness)) # Load the external genome. self.ext_genome = ext_genome # Load traces. self.success_traces_path = success_traces_path self.success_traces = Trace.load_traces(self.success_traces_path) self.promising_traces_path = promising_traces_path self.promising_traces = Trace.load_traces(self.promising_traces_path) # Initiate some parameters. self.gp_params = gp_params self.pop_size = gp_params['pop_size'] self.max_gen = gp_params['max_gen'] self.mut_rate = gp_params['mut_rate'] self.xover_rate = gp_params['xover_rate'] self.fitness_threshold = gp_params['fitness_threshold'] def save_variants_to_files(self): folder = "./variants/generation_%d" % (self.generation) folder = os.path.join(self.job_dir, folder) if not os.path.isdir(folder): os.makedirs(folder) file_paths = [] for j in range(len(self.popul)): path = "./variants/generation_%d/%d.pdf" % (self.generation, j) path = os.path.join(self.job_dir, path) file_paths.append(path) PdfGenome.save_to_file(self.popul[j], path) return file_paths def load_variant(self, gen, vid): path = "./variants/generation_%d/%d.pdf" % (gen, vid) path = os.path.join(self.job_dir, path) pdf_obj = PdfGenome.load_genome(path) return pdf_obj def load_variant_trace(self, gen, vid): path = "./variants/generation_%d/%d.pdf" % (gen, vid) path = os.path.join(self.job_dir, path) trace = PdfGenome.load_trace(path) return trace def fitness(self, *args): return self.fitness_func(*args) def run(self): self.logger.info("Start a gp task with %s" % (self.gp_params)) score_file_name = os.path.join(self.job_dir, "fitness_scores.pickle") self.fitness_scores = {} self.popul = self.initial_population() self.generation = 1 while self.generation <= self.max_gen: self.logger.info( "There're %d variants in population at generation %d." % (len(self.popul), self.generation)) file_paths = self.save_variants_to_files() scores = self.fitness(file_paths, self.seed_sha1) # Introduce a fake score for testing tracing. # scores = [0.1, 0.2] * (self.pop_size/2) self.fitness_scores[self.generation] = scores pickle.dump(self.fitness_scores, open(score_file_name, 'wb')) self.logger.info("Fitness scores: %s" % scores) self.logger.info("Sorted fitness: %s" % sorted(scores, reverse=True)) if max(scores) > self.fitness_threshold: self.best_score = max(scores) self.logger.info( "Already got a high score [%.2f]>%.2f variant, break the GP process." % (max(scores), self.fitness_threshold)) # Store the success traces. for i in range(len(scores)): score = scores[i] if score > self.fitness_threshold: success_trace = self.popul[i].active_trace self.success_traces.append(success_trace) # Dump the new generated traces. # We assume no concurrent GP tasks depending on the traces. Trace.dump_traces(self.success_traces, self.success_traces_path) touch(os.path.join(self.job_dir, finished_flag)) break elif self.generation == max_gen: self.logger.info("Failed at max generation.") if max(scores) >= self.seed_fitness: # k can be a parameter best_k_gen, best_k_vid, best_k_scores = self.get_best_k_variant( 4, 1, self.generation) self.best_score = best_k_scores[0] for i in range(len(best_k_scores)): best_gen = best_k_gen[i] best_vid = best_k_vid[i] this_score = best_k_scores[i] promising_trace = self.load_variant_trace( best_gen, best_vid) self.logger.info( "Save the promising trace %.2f of %d:%d" % (this_score, best_gen, best_vid)) if promising_trace not in self.promising_traces: self.promising_traces.append(promising_trace) Trace.dump_traces(self.promising_traces, self.promising_traces_path, exclude_traces=self.success_traces) break # Crossover if self.xover_rate > 0: self.popul = self.select(self.popul, scores, self.pop_size / 2) self.logger.debug( "After selecting goods and replacing bads, we have %d variants in population." % len(self.popul)) for p1, p2 in zip(self.popul[0::2], self.popul[1::2]): c1, c2 = PdfGenome.crossover(p1, p2) self.popul.append(c1) self.popul.append(c2) self.logger.debug( "After crossover, we have %d variants in population." % len(self.popul)) else: # No Crossover self.popul = self.select(self.popul, scores, self.pop_size) self.logger.debug( "After selecting goods and replacing bads, we have %d variants in population." % len(self.popul)) # Mutation for i in range(len(self.popul)): if i not in self.vid_from_trace: self.logger.debug("Generating %d:%d variant" % (self.generation + 1, i)) try: self.popul[i] = PdfGenome.mutation( self.popul[i], self.mut_rate, self.ext_genome) except Exception, e: self.logger.debug( "Exception %s, replace with original seed" % e) self.popul[i] = deepcopy(self.seed_root) else: self.logger.debug("Keep %d:%d variant from trace." % (self.generation + 1, i)) self.generation = self.generation + 1 self.logger.info("Stopped the GP process with max fitness %.2f." % self.best_score) touch(os.path.join(self.job_dir, result_flag % self.best_score)) return True
class GPPdf: def __init__( self, job_dir, seed_sha1, seed_file_path, logger, random_state_file_path, ext_genome, success_traces_path, promising_traces_path, gp_params, fitness_function, ): self.logger = logger self.job_dir = job_dir self.seed_sha1 = seed_sha1 # Load the pre-defined random state for reproducing the existing results. if random_state_file_path: try: random_state = pickle.load(open(random_state_file_path, 'rb')) random.setstate(random_state) logger.debug("Loaded a random state from %s" % random_state_file_path) except: logger.warning("Failed to load random state from %s" % random_state_file_path) # Save random state for reproducing results in the future. random_state_file = os.path.join(self.job_dir, "random_state.pickle") random_state = random.getstate() pickle.dump(random_state, open(random_state_file, 'wb')) self.fitness_func = fitness_function # Load the seed. self.seed_file_path = seed_file_path self.seed_fitness = self.fitness([self.seed_file_path], self.seed_sha1)[0] self.seed_root = PdfGenome.load_genome(seed_file_path) self.logger.info("Loaded %s as PDF seed, fitness %.2f." % (seed_file_path, self.seed_fitness)) # Load the external genome. self.ext_genome = ext_genome # initialize the ext_genome trie # it's possible that there is the path, but no value, because of the trie # the root is root, next = '', next is 'Root'... self.ext_trie = pygtrie.StringTrie(separator=os.path.sep) for ext_id in range(len(self.ext_genome)): parent, gene = ext_genome[ext_id] key = ''.join([item for item in gene if type(item) != int]) try: self.ext_trie[key].append(ext_id) except KeyError: self.ext_trie[key] = [ext_id] # populate the ext_ids by recursion. # traverse every node.. queue = [] for child in self.ext_trie._root.children.iteritems(): queue.append(child) while len(queue) > 0: key, node = queue.pop(0) if type(node.value) == object: node.value = self.descendent_value(node) else: node.value = list(set(node.value + self.descendent_value(node))) # put children back into the queue for subkey, obj in node.children.iteritems(): queue.append((subkey, obj)) # Load traces. self.success_traces_path = success_traces_path self.success_traces = Trace.load_traces(self.success_traces_path) self.promising_traces_path = promising_traces_path self.promising_traces = Trace.load_traces(self.promising_traces_path) # Initiate some parameters. self.gp_params = gp_params self.pop_size = gp_params['pop_size'] self.max_gen = gp_params['max_gen'] self.mut_rate = gp_params['mut_rate'] self.xover_rate = gp_params['xover_rate'] self.fitness_threshold = gp_params['fitness_threshold'] def descendent_value(self, node): # already visited if type(node.value) != object: return node.value # leaf node, there is no more children elif node.children == {}: return node.value else: # recurse res = [] for key, childnode in node.children.iteritems(): res += self.descendent_value(childnode) res = list(set(res)) return res def save_variants_to_files(self): folder = "./variants/generation_%d" % (self.generation) folder = os.path.join(self.job_dir, folder) if not os.path.isdir(folder): os.makedirs(folder) file_paths = [] for j in range(len(self.popul)): path = "./variants/generation_%d/%d.pdf" % (self.generation, j) path = os.path.join(self.job_dir, path) file_paths.append(path) PdfGenome.save_to_file(self.popul[j], path) return file_paths def load_variant(self, gen, vid): path = "./variants/generation_%d/%d.pdf" % (gen, vid) path = os.path.join(self.job_dir, path) pdf_obj = PdfGenome.load_genome(path) return pdf_obj def load_variant_trace(self, gen, vid): path = "./variants/generation_%d/%d.pdf" % (gen, vid) path = os.path.join(self.job_dir, path) trace = PdfGenome.load_trace(path) return trace def fitness(self, *args): return self.fitness_func(*args) def run(self): self.logger.info("Start a gp task with %s" % (self.gp_params)) score_file_name = os.path.join(self.job_dir, "fitness_scores.pickle") self.fitness_scores = {} self.popul = self.initial_population() self.generation = 1 while self.generation <= self.max_gen: self.logger.info( "There're %d variants in population at generation %d." % (len(self.popul), self.generation)) file_paths = self.save_variants_to_files() scores = self.fitness(file_paths, self.seed_sha1) # Introduce a fake score for testing tracing. # scores = [0.1, 0.2] * (self.pop_size/2) self.fitness_scores[self.generation] = scores pickle.dump(self.fitness_scores, open(score_file_name, 'wb')) self.logger.info("Fitness scores: %s" % scores) self.logger.info("Sorted fitness: %s" % sorted(scores, reverse=True)) if max(scores) > self.fitness_threshold: self.best_score = max(scores) self.logger.info( "Already got a high score [%.2f]>%.2f variant, break the GP process." % (max(scores), self.fitness_threshold)) # Store the success traces. for i in range(len(scores)): score = scores[i] if score > self.fitness_threshold: success_trace = self.popul[i].active_trace self.success_traces.append(success_trace) # Dump the new generated traces. # We assume no concurrent GP tasks depending on the traces. Trace.dump_traces(self.success_traces, self.success_traces_path) touch(os.path.join(self.job_dir, finished_flag)) break elif self.generation == max_gen: self.logger.info("Failed at max generation.") if max(scores) >= self.seed_fitness: best_gen, best_vid, self.best_score = self.get_best_variant( 1, self.generation) promising_trace = self.load_variant_trace( best_gen, best_vid) self.logger.info("Save the promising trace %.2f of %d:%d" % (self.best_score, best_gen, best_vid)) self.promising_traces.append(promising_trace) Trace.dump_traces(self.promising_traces, self.promising_traces_path, exclude_traces=self.success_traces) break # Crossover if self.xover_rate > 0: self.popul = self.select(self.popul, scores, self.pop_size / 2) self.logger.debug( "After selecting goods and replacing bads, we have %d variants in population." % len(self.popul)) for p1, p2 in zip(self.popul[0::2], self.popul[1::2]): c1, c2 = PdfGenome.crossover(p1, p2) self.popul.append(c1) self.popul.append(c2) self.logger.debug( "After crossover, we have %d variants in population." % len(self.popul)) else: # No Crossover self.popul = self.select(self.popul, scores, self.pop_size) self.logger.debug( "After selecting goods and replacing bads, we have %d variants in population." % len(self.popul)) # Mutation for i in range(len(self.popul)): if i not in self.vid_from_trace: self.logger.debug("Generating %d:%d variant" % (self.generation + 1, i)) try: self.popul[i] = PdfGenome.mutation( self.ext_trie, self.popul[i], self.mut_rate, self.ext_genome) except Exception, e: self.logger.debug( "Exception %s, replace with original seed" % e) self.popul[i] = deepcopy(self.seed_root) else: self.logger.debug("Keep %d:%d variant from trace." % (self.generation + 1, i)) self.generation = self.generation + 1 self.logger.info("Stopped the GP process with max fitness %.2f." % self.best_score) touch(os.path.join(self.job_dir, result_flag % self.best_score)) return True