def run_mutant(self): self.mutation_history = [] self.mutation_cluster = {} self.mutation = Mutation(self.configuration.get_mutation_trace(), self.databank) self.mutation_traces = self.make_mutation_traces() # run a default trace for compare logging.info(" start run default trace") self.executor.start() self.executor.goto_url() initial_state = self.get_initail_state() self.run_mutant_script(initial_state) self.close() # run all mutation traces logging.info(' total %d mutation traces ', len(self.mutation_traces)) for n in xrange(len(self.mutation_traces)): logging.info(" start run number %d mutant trace", n) self.executor.start() self.executor.goto_url() initial_state = self.get_initail_state() self.run_mutant_script(initial_state, self.mutation_traces[n]) self.close() self.save_mutation_history()
def mutate(self, remainingProteins): for i in remainingProteins: willItChange = randint(0, 99) if willItChange == 0: mutation = Mutation(self.proteinString[i]) self.proteinString = self.proteinString[0:i] + mutation.getProtein() + self.proteinString[i+1:] self.mutations.append(mutation)
def next_generation(self, population): selection = Selection(deepcopy(population), strategy=SelectionStrategy.TOURNAMENT.value) mating = Mating(selection) crossover = Crossover(mating) for i, individual in enumerate(crossover.mating.selection.population.individuals): mutation = Mutation(individual) crossover.mating.selection.population.individuals[i] = mutation.mutate() print(f'Individuals: {len(self.individuals)}') print(f'New generation: {len(crossover.new_generation)}') return crossover.mating.selection.population
def createChildren(self, parents): roulette = Roulette(parents) children = [] for _ in range(5): parent_1 = roulette.select_parent() parent_2 = roulette.select_parent() new_children = Crossover(parent_1, parent_2).generate_children() Mutation(new_children[0]).mutate() Mutation(new_children[1]).mutate() children.append(new_children[0]) children.append(new_children[1]) return children
def load_mutation_from_dict(d): mutation_id = d['id'] ref_counts = int(d['ref_counts']) var_counts = int(d['var_counts']) mutation = Mutation(mutation_id, ref_counts, var_counts) for state_dict in d['states']: state = load_state_from_dict(state_dict) mutation.add_state(state) return mutation
def next(self): if self.__f is None: self.__open() line, stripped_line = self.__readline() self.hdr = {self.rename_cols.get(c, c).upper() : i for i, c in enumerate(stripped_line.split(self.separator))} all_cols = set(self.hdr.keys()) coord_cols = all_cols & self.COLUMNS req_gen_headers = len(set(["CHR", "START"]) & coord_cols) == 2 and len(set(["REF", "ALT"]) & coord_cols) >= 1 req_prot_headers = len(set(["TRANSCRIPT", "PROTEIN"])) >= 1 and "AA_POS" in coord_cols and len(set(["AA_REF", "AA_ALT"]) & coord_cols) >= 1 if self.coord_type is None: # infer type of coordinates from the columns header if req_gen_headers: self.coord_type = Mutation.GENOMIC columns = all_cols & self._G_COLS elif req_prot_headers: self.coord_type = Mutation.PROTEIN columns = all_cols & self._P_COLS else: raise Exception("Not possible to infer which kind of coordinates to use from the headers") elif self.coord_type == Mutation.GENOMIC: columns = all_cols & self._G_COLS if not req_gen_headers: raise Exception("Missing required headers for genomic coordinates: CHR, START and any of REF or ALT") elif self.coord_type == Mutation.PROTEIN: columns = all_cols & self._P_COLS if not req_prot_headers: raise Exception("Missing required headers for proteomic coordinates: any of TRANSCRIPT or PROTEIN and AA_POS and any of AA_REF or AA_ALT") else: raise Exception("Unknown coordinate type: {}".format(self.coord_type)) self.columns = [(c, self.hdr[c], self._MUT_ATTR[c], self._ATTR_TYPE.get(c)) for c in columns] line, stripped_line = self.__readline() if len(line) == 0: raise StopIteration line = stripped_line fields = line.split(self.separator) num_fields = len(fields) m = Mutation() m.coord = self.coord_type for col, ix, attr, atype in self.columns: if ix < num_fields: if atype is not None: setattr(m, attr, atype(fields[ix])) else: setattr(m, attr, fields[ix]) return m
def __init__(self, population_size, sample_genotype, crossover_rate=0.6, mutation_rate=0.2, maximize=True): self.population_size = population_size self.genotype = sample_genotype self.crossover_rate = crossover_rate self.mutation_rate = mutation_rate self.selector = RankSelector(maximize) self.crossover = OnePointCrossover() self.mutation = Mutation() self.generations = [] self.maximize = maximize
def mutate(vcf_info, opts): abs_path = opts[util.ABSOLUTE_PATH_OPTION] output = opts[util.OUTPUT_OPTION] log_not_found = [] ################################# RefSeq_human_full.fasta ################################### util.print_task(util.TASK_LOAD_PROTEIN_FILE) refseq_human = read_protein_file(abs_path) util.print_status(util.TASK_SUCCESS) ############################################################################################# ############################# Transcripts_refseq.fasta reading ############################## util.print_task(util.TASK_LOAD_TRANSCRIPT_FILE) refseq_transc, nm_np_conversor = read_transcript_file(abs_path) util.print_status(util.TASK_SUCCESS) ############################################################################################# ################################# vcf info file processing ################################## mutations = defaultdict(list) samples = set() curr_sample = 1 util.print_task(util.TASK_PROCESS_MUTATION) with open(vcf_info, "r") as f: f.readline() for line in f: try: mutation = Mutation(line, nm_np_conversor, refseq_transc, refseq_human) except KeyError: log_not_found.append(line.rstrip()) continue samples.add(mutation.sample) if mutation.mut_protein_sequence: if len(samples) == curr_sample: mutations[mutation.transcript].append(mutation) else: generate_report.mutation(mutations[mutations.keys()[0]][0].sample, mutations, output) mutations = defaultdict(list) mutations[mutation.transcript].append(mutation) curr_sample += 1 generate_report.mutation(mutations[mutations.keys()[0]][0].sample, mutations, output) util.print_status(util.TASK_SUCCESS)
def createChildren(self, parents): # Cria a roleta roulette = Roulette(parents) children = [] # Cria 10 filhos for _ in range(5): parent_1 = roulette.select_parent() parent_2 = roulette.select_parent() new_children = Crossover(parent_1, parent_2).generate_children() # Realiza a mutação nos filhos (respeitando a proporcionalidade) Mutation(new_children[0]).mutate() Mutation(new_children[1]).mutate() children.append(new_children[0]) children.append(new_children[1]) return children
def attempt_breeding(self, individual_i, individual_j): """ Attempt to breed the two individuals (several times if necessary) If the breeding is successful and the Neural Networks can be built, the two offsprings are added into the next generation. The breeding process is the following: - cross-over between the two lovers - mutate the offsprings with probability p - create the offsprings neural networks """ successful_breedings = 0 breeding_attempts = 0 while(successful_breedings < 2 and breeding_attempts < Settings.MAX_BREEDING_ATTEMPTS): try: # cross over the two individuals offspring_1, offspring_2 = Cross_Over(self.population[individual_i], self.population[individual_j]).breed() except NoBridgeException as e: print(e) print("Failed to cross-over the individuals") return (False) except Exception as e: print("Failed to cross-over the individuals") return (False) for offspring in [offspring_1, offspring_2]: # apply (several if we are stuck in a local optimum) mutations to the offspring for _ in range(self.mutations_per_breeding): offspring = Mutation(offspring).mutate() mutated_offspring = offspring # build and train the crossed-mutated graphs on the spot if(successful_breedings < 2): try: mutated_offspring_fitness = self.build_and_train_network(mutated_offspring) self.next_generation_dna.append(mutated_offspring) self.next_generation_fitness.append(mutated_offspring_fitness) successful_breedings += 1 except Exception as e: print("Failed to build the NN \n\n" + str(e)) return(successful_breedings)
def revolution(self, args): #config if (args.crossmode == "one"): cross_func = Crossover.onePoint elif (args.crossmode == "two"): cross_func = Crossover.twoPoints else: cross_func = Crossover.randomPoints # Prepare dataset dataset = readDataset(args.dpath) # Create individuals & population ppl = Population() for i in range(args.individuals): individual = Individual() individual.createGene(dataset, args.gene) ppl.addInd(individual) # Evolution for i in range(args.revolution): # Evaluation population in individuals ppl.calcFitness(self.evaluate) if ((i % 10) == 0): ppl.show() # Select parents if (args.elite): parents = Select.Elite(ppl, args.esize, args.mode) parents.extend( Select.Tournament(ppl, args.individuals - args.esize, args.tornsize, args.mode)) else: parents = Select.Tournament(ppl, args.individuals, args.tornsize, args.mode) # Clossover children = cross_func(parents, args.individuals, args.gene, args.crossrate) # Mutation children = Mutation.mutation(children, args.mutaterate, dataset) # Swap children ppl.setPpl(children) # show result ppl.calcFitness(self.evaluate) ppl.show()
def load_muts_from_file(opt, mut_fname): """ Load all mutations from a population snapshot. Load the entire population of mutations from a population snapshot file. Inputs ------ opt: global parameter set, used in initialising each mutation mut_fname: path to CSV file containing mutation snapshot (which must have been written using save_muts_to_file above) Returns ------- 2-tuple of dictionaries: (all_muts, mutation_map) all_muts: a dictionary of mutations of the form {mut_type: list of muts of that type}, passed back to simulation as master list of mutations. mutation_map: a dictionary of mutations of the form {mut_id: Mutation}, used to reconstruct the relationships between clones and mutations when loading population snapshot from file. """ all_muts = {'b': [], 'n': [], 'd': [], 'r': []} mutation_map = {} with open(mut_fname) as mut_file: mut_reader = csv.DictReader(mut_file) for row in mut_reader: # initialise new Mutation object new_mut = Mutation.init_from_file(opt, all_muts, row) # add it to ID map mutation_map[new_mut.mut_id] = new_mut # add it to main mutation dictionary try: all_muts[new_mut.mut_type].append(new_mut) except KeyError: raise KeyError("invalid mutation type (from file): {}".format( new_mut.mut_type)) return all_muts, mutation_map
def __init__(self, starthgt=11, endhgt=-9, spins=3.5, radius=1, speed=SPD_DEFAULT, spawn_rate=1, pair_generator=random_pair): self.starthgt = starthgt self.endhgt = endhgt self.spins = spins self.maxhgt = self.starthgt - (math.pi)*self.spins self.radius = radius self._animas = 0 self._rungs = [] self._tba = [] self.speed = speed self.spdmod = 1 self._last_rung = None self._last_ends = [] self.spawn_rate = spawn_rate self.pair_generator = pair_generator self.mutation = Mutation() self.enter_callback = None self.exit_callback = None self.in_box = []
def main(): parser = argparse.ArgumentParser(description='Tweak GA parameters') parser.add_argument('-size','--populationSize', type=int, required=False, default=100, help = 'Desired population size (int)') parser.add_argument('-iter','--maxIter', type=int, required=False, default=100, help = "Max number of iterations allowed (int)") parser.add_argument('-n','--n', type=int, required=False, default=10, help = 'Desired number of neural_networks in the hidden layer') parser.add_argument('-k','--K', type=float, required=False, default=1.1, help = "Chromosome is mutated by adding a number from the normal_distibution(0,K) to it's weights value") parser.add_argument('-err','--errThreshold', type=float, required=False, default=0.1, help = "Algorithm stops search if it has found a chromosome with error less than errThreshold") parser.add_argument('-train','--trainSet', type=str, required=False, default="learningSet/train-set.txt", help = "Path to training_set") #parser.add_argument('-test','--testSet', type=str, required=False, default="learningSet/test-set.txt", # help = "Path to test_set") args = parser.parse_args() ERR_THRESHOLD = args.errThreshold VEL_POP = args.populationSize MAX_ITER = args.maxIter N = args.n K = args.K train_set = parseLearningSet(args.trainSet) ## initialize needed operators fitnessOp = Fitness(train_set) mutationOp = Mutation(K, N, VEL_POP) ##initialize population P = Population(N, VEL_POP) ##returns best neural_network (individual) best_nn = run_GA(P, fitnessOp, mutationOp, VEL_POP, MAX_ITER, ERR_THRESHOLD) test_set = [] # parseLearningSet(args.testSet) test_dict = {} #parseLearningDict(args.testSet) writeOut(best_nn, test_set, test_dict)
def main(): parser = argparse.ArgumentParser( description="Simple toolkit to test changes in RBP/TF binding affinity" "caused by genetic variants.") parser.add_argument(dest='vcf', help='Path to the VCF file') parser.add_argument(dest='bed', help='Path to the bed file') parser.add_argument(dest='fasta', help='Path to the fasta file.') parser.add_argument( '--list', action='store_true', help='If bed argument represents a list of bed file to process' ' (one per line)') parser.add_argument( '--chr', action='store_true', help='Input files should contain chr string. If not found,' ' a fix is tried.') parser.add_argument( '--gtf', help= 'gtf file to further take into account intron/exon boundaries. Canonical transcripts' 'will be retrieved.') parser.add_argument( '--gtf_is_processed', action='store_true', help='If set \'--gtf\' argument represents the processed' 'daraframe from an original gtf file.') parser.add_argument('-o', '--output', default=os.getcwd(), help='Output directory. Default: current directory') parser.add_argument( '-p', '--fromPickle', help="If given, analysis should start from previously serialized object." "Input file will be ignored") args = parser.parse_args() osutils = OSutils() is_pickle = False if args.fromPickle: osutils.is_pickled(args.fromPickle) with open(args.fromPickle, 'rb') as file_object: raw_data = file_object.read() deserialized = pickle.loads(raw_data) is_pickle = True else: Logger.print_advances('Validating input data') if args.gtf: gtf = GTF(args.gtf, args.gtf_is_processed, args.output) else: gtf = None mutation = Mutation(args.vcf, args.bed, args.fasta, gtf, args.list, args.chr, args.output) deserialized = {} Logger.print_advances('Starting analysis') for name, bedobj in mutation.beds.items(): Logger.print_advances("Processing {} peak file.".format(name)) Logger.log("Intercepting variants") fn = mutation.vcf_intersect(mutation.vcf_bed, bedobj, name) Logger.log('Extracting peaks fasta sequences') bed_seq = mutation.get_peak_sequence(bedobj, mutation.fasta) bed_peak_fasta = mutation.save_fasta_sequence( bed_seq, osutils.set_out_fn(mutation.outdir, name + ".fasta")) Logger.log("Mutating fasta sequences") isec = Isec(fn) seqs_mut = mutation.mutate_fasta(bed_peak_fasta, isec) deserialized[name] = seqs_mut Logger.log("Done") Logger.log("Dumping data structure to {}".format("data.pickle")) serialized = pickle.dumps(deserialized) with open(osutils.set_out_fn(mutation.outdir, "data.pickle"), 'wb') as file_object: file_object.write(serialized) motdisrupt = PeaksMutated(deserialized, is_pickle, args.output) motdisrupt.list_beds() motdisrupt.write_object() pwm = PWMs() pwm.parse_cisBP_pwm()
creator.create("FitnessMax", base.Fitness, weights=(-1.0,)) creator.create("Individual", Individual, fitness=creator.FitnessMax) toolbox = base.Toolbox() # Structure initializers toolbox.register("individual", initIndividual, creator.Individual) toolbox.register("population", tools.initRepeat, list, toolbox.individual) # use multiple processors pool = multiprocessing.Pool(5) toolbox.register("map", pool.map) # register operators fit = Fitness("data/"+trainset_name) mut = Mutation() cross = Crossover() toolbox.register("evaluate", fit.evaluate) toolbox.register("mate", cross.cxOnePoint) toolbox.register("mutate", mut.mutate) toolbox.register("select", tools.selTournament, tournsize=3) def main(id, checkpoint_name=None): # random.seed(64) if checkpoint_name: # A file name has been given, then load the data from the file cp = pickle.load(open(checkpoint_name, "rb")) pop = cp["population"] start_gen = cp["generation"] + 1
print("roullette") roulet = Roulette_wheel(variable_config) roulette = roulet.roulette(variable_config, func_solution, popul) selection = roulette #print(roulette) ''' #KRZYŻOWANIE #print("KRZYŻOWANIE") crosov = CrossingOver(variable_config) crosingover = crosov.crosingOver(variable_config, selection) #print(crosingover) #MUTACJA #print("MUTACJA") mutat = Mutation(variable_config) mutation = mutat.mutation(variable_config, crosingover) #print(mutation) #INWERSJA #print("INWERSJA") inver = Inversion(variable_config) inversion = inver.inversion(variable_config, mutation) #print(inversion) offspring = np.asarray(inversion) #print(offspring) decision_variables2 = randomBinSol.decVariables(variable_config, offspring) #print(decision_variables2)
# Structure initializers toolbox.register("individual", initIndividual, creator.Individual) toolbox.register("population", tools.initRepeat, list, toolbox.individual) # use multiple processors or GPU if config.global_config["device"]["device_type"] == "CPU": n_cpus = config.global_config["device"]["n_cpus"] pool = multiprocessing.Pool(n_cpus) toolbox.register("map", pool.map) logging.info(f"Running on {n_cpus} CPUs") else: logging.info(f"Running on GPU.") # register operators fit = Fitness(**config.global_config["dataset"]) mut = MutationConv() if use_conv_layers else Mutation() cross = CrossoverConv() if use_conv_layers else Crossover() toolbox.register("eval_batch", fit.evaluate_batch) toolbox.register("evaluate", fit.evaluate) toolbox.register("mate", cross.cxOnePoint) toolbox.register("mutate", mut.mutate) if nsga_number == 3: ref_points = tools.uniform_reference_points(2, 12) toolbox.register("select", tools.selNSGA3, ref_points=ref_points) elif nsga_number == 2: # nsgaII - deap implementation toolbox.register("select", tools.selNSGA2) elif nsga_number == 1: # stepan's version of nsga toolbox.register("select", selectNSGA)
class Helix: def __init__(self, starthgt=11, endhgt=-9, spins=3.5, radius=1, speed=SPD_DEFAULT, spawn_rate=1, pair_generator=random_pair): self.starthgt = starthgt self.endhgt = endhgt self.spins = spins self.maxhgt = self.starthgt - (math.pi)*self.spins self.radius = radius self._animas = 0 self._rungs = [] self._tba = [] self.speed = speed self.spdmod = 1 self._last_rung = None self._last_ends = [] self.spawn_rate = spawn_rate self.pair_generator = pair_generator self.mutation = Mutation() self.enter_callback = None self.exit_callback = None self.in_box = [] def mutate(self, amt): self.mutation.mutate(amt) self.spdmod += amt * SPD_MUT_RATIO ### DO NOT USE def make_rung(self, next=None): rung = Rung(self, self.pair_generator()) if next: rung.next = next self._last_rung.prev = rung self._rungs.append(rung) self._last_ends = rung self._last_rung = rung def obscure(self): map(Rung.obscure, self._rungs) def unobscure(self): map(Rung.unobscure, self._rungs) def register_callbacks(self, enter_callback, exit_callback): self.enter_callback = enter_callback self.exit_callback = exit_callback def update(self, dt): ## update the mutation self.mutation.update(dt) ## check to see if the last rung is far enough away for a new one if self._last_rung: if self._last_rung._hgt <= self.starthgt - self.spawn_rate: self.make_rung(self._last_rung) else: self.make_rung() ## compute the speed with mod -1.12 -> -1.3 speed = self.speed*self.spdmod #print "speed", speed ## update the mutation self.mutation.update(dt) ## check to see if the last rung is far enough away for a new one if self._last_rung: if self._last_rung._hgt <= self.starthgt - self.spawn_rate: self.make_rung(self._last_rung) else: self.make_rung() self.spdmod = max(self.spdmod - dt*SPD_DECAY, 1) ## compute the speed with mod -1.12 -> -1.3 speed = self.speed*self.spdmod for r in self._rungs: if r._hgt < -1.12 and r._hgt > -1.3: if not r.in_box: r.in_box=True if self.enter_callback: self.enter_callback(r) if r.in_box and r._hgt < -1.3: r.in_box = False if self.exit_callback: self.exit_callback(r) r.update(dt, speed) ## update and kill the rungs that have it coming :P dead = [] for entity in self._rungs: if entity.dead: dead.append(entity) for e in dead: self._rungs.remove(e)
# Select parents for offspring generation for ch in range(0, scored_population.__len__(), 1): # perform parent selection from scored population selection = Selection(population=scored_population) parents = selection.select() # perform crossover based on 50% probability crossover_prob = random.choice([True, False]) crossover = Crossover(parents, crossover_probability=crossover_prob) offspring = crossover.perform_crossover() # perform mutation based on 50% probability mutation_prob = random.choice([True, False]) mutation = Mutation(offspring, mutation_probability=mutation_prob) final_offspring = mutation.mutate() # add offspring to next generation next_gen_population.append(final_offspring) # Score next gen population scored_next_gen_population = [] for chromosome in next_gen_population: fitness = Fitness(chromosome=chromosome, fitness_goal=fitness_goal) scored_next_gen_chromosome = fitness.calculate_fitness() scored_next_gen_population.append(scored_next_gen_chromosome) # Get generation best scored_next_gen_population.sort(key=lambda x: x[1]) last_index = scored_next_gen_population.__len__() - 1
class SeleniumCrawler(Crawler): def __init__(self, configuration, executor, automata, databank, algorithm): self.configuration = configuration self.executor = executor self.automata = automata self.databank = databank #ALGO self.algorithm = algorithm self.algorithm.set_utility(self, configuration, executor, automata) #list of event:(state, clickable, inputs, selects, iframe_list) self.event_history = [] def run(self): #start time self.time_start = time.time() self.executor.start() self.executor.goto_url() initial_state = self.get_initail_state() self.run_script_before_crawl(initial_state) self.crawl(1) return self.automata #the core def run_algorithm(self): # repeat for trace_amount times for i in range( self.configuration.get_trace_amount() ): print ('=initial state') self.initial() print ('=get into loop') j=0 while self.action_events and self.configuration.get_max_depth() != 0: print('==start an action event') #string = ''.join([ str(action['action']['clickable'].get_id())+str(action['depth'])+str(action['state'].get_id()) for action in self.action_events ]) string = ''.join([ str(action['depth'])+str(action['state'].get_id()) for action in self.action_events ]) logging.info(' action_events : '+string ) #print(' action_events : '+string) print('==get next action') state, action, depth = self.get_next_action() print('==change state') self.change_state(state, action, depth) print('==change edge') edge = self.trigger_action(state, action, depth) print('==update state') new_state,new_depth =self.update_states(state, edge, action, depth) print('==end an action event') print ("new depth:", new_depth) print ("max depth:", self.configuration.get_max_depth()) print ("new state:", new_state.get_id(),j) print ("max state:", self.configuration.get_max_states()) #check depth if new_depth > self.configuration.get_max_depth(): print("reach max depth") #check state if int(new_state.get_id()) >= self.configuration.get_max_states(): print("reach max state") break #check time if (time.time() - self.time_start) > self.configuration.get_max_time(): logging.info("|||| TIMO OUT |||| end crawl ") break j=j+1 print('=end a for_loop') self.close() return self.automata def initial(self): self.action_events = [] #start time self.time_start = time.time() print('==prepare algorithm') self.algorithm.prepare() print('==get current state && add new events') current_state = self.automata.get_current_state() self.add_new_events(current_state, None, 0) def close(self): self.algorithm.end() self.executor.close() print("close browser") def get_next_action(self): event = self.algorithm.get_next_action( self.action_events ) return event['state'], event['action'], event['depth'] def change_state(self, state, action, depth): current_state = self.automata.get_current_state() if current_state != state: self.algorithm.change_state(state, action, depth) logging.info(' now depth(%s) - max_depth(%s); current state: %s', depth, self.configuration.get_max_depth(), state.get_id() ) def trigger_action(self, state, action, depth): inputs = state.get_copy_inputs( action['iframe_key'] ) selects = state.get_copy_selects(action['iframe_key']) checkboxes = state.get_copy_checkboxes(action['iframe_key']) radios = state.get_copy_radios(action['iframe_key']) new_edge = Edge(state.get_id(), None, action['clickable'], inputs, selects, checkboxes, radios, action['iframe_key'] ) self.algorithm.trigger_action( state, new_edge, action, depth ) return new_edge #have to run two automata in p2b2? def update_states(self, current_state, new_edge, action, depth): dom_list, url, is_same = self.is_same_state_dom(current_state) if is_same: self.algorithm.update_with_same_state(current_state, new_edge, action, depth, dom_list, url) return current_state,depth if self.is_same_domain(url): logging.info(' |depth:%s state:%s| change dom to: %s', depth, current_state.get_id(), self.executor.get_url()) # check if this is a new state temp_state = State(dom_list, url) new_state, is_newly_added = self.automata.add_state(temp_state) self.automata.add_edge(new_edge, new_state.get_id()) # save this click edge current_state.add_clickable(action['clickable'], action['iframe_key']) self.automata.change_state(new_state) # depth GO ON depth += 1 self.event_history.append(new_edge) if is_newly_added: self.algorithm.update_with_new_state(current_state, new_state, new_edge, action, depth, dom_list, url) return new_state,depth else: self.algorithm.update_with_old_state(current_state, new_state, new_edge, action, depth, dom_list, url) return new_state,depth else: self.algorithm.update_with_out_of_domain(current_state, new_edge, action, depth, dom_list, url) return current_state,depth def add_new_events(self, state, prev_state, depth): self.algorithm.add_new_events(state, prev_state, depth) #========================================================================================= # BASIC CRAWL #========================================================================================= def get_initail_state(self): logging.info(' get initial state') dom_list, url = self.executor.get_dom_list(self.configuration) initial_state = State( dom_list, url ) is_new, state = self.automata.set_initial_state(initial_state) if is_new: logging.info(' is new, save state') self.automata.save_state( initial_state, 0) self.automata.save_state_shot(self.executor, initial_state) log_list = self.automata.save_log(self.executor,initial_state) coor_list = self.automata.save_coor(self.executor,initial_state) else: self.automata.change_state(state) time.sleep(self.configuration.get_sleep_time()) print('return state') return state def run_script_before_crawl(self, prev_state): for edge in self.configuration.get_before_script(): #self.click_event_by_edge(edge) self.executor.click_event_by_edge(edge) self.event_history.append(edge) dom_list, url, is_same = self.is_same_state_dom(prev_state) if is_same: continue logging.info(' change dom to: ', self.executor.get_url()) # check if this is a new state temp_state = State(dom_list, url) new_state, is_newly_added = self.automata.add_state(temp_state) self.automata.add_edge(edge, new_state.get_id()) # save this click edge prev_state.add_clickable(edge.get_clickable(), edge.get_iframe_list()) if is_newly_added: logging.info(' add new state %s of: %s', new_state.get_id(), url) self.automata.save_state(new_state, 0) self.automata.save_state_shot(self.executor, new_state) self.automata.change_state(new_state) prev_state = new_state #============================================================================================= # BACKTRACK #============================================================================================= def backtrack(self, state): # check if depth over max depth , time over max time if (time.time() - self.time_start) > self.configuration.get_max_time(): logging.info("|||| TIMO OUT |||| end backtrack ") return #if url are same, guess they are just javascipt edges if self.executor.get_url() == state.get_url(): #first, just refresh for javascript button logging.info('==<BACKTRACK> : try refresh') self.executor.refresh() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't , try go back form history logging.info('==<BACKTRACK> : try back_history ') self.executor.back_history() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't , try do last edge of state history if self.event_history: logging.info('==<BACKTRACK> : try last edge of state history') self.executor.forward_history() #self.click_event_by_edge( self.event_history[-1] ) self.executor.click_event_by_edge( self.event_history[-1] ) dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't, try go through all edge logging.info('==<BACKTRACK> : start form base ur') self.executor.goto_url() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True edges = self.automata.get_shortest_path(state) for edge in edges: self.executor.click_event_by_edge( edge ) dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't, restart and try go again logging.info('==<BACKTRACK> : retart driver') edges = self.automata.get_shortest_path(state) self.executor.restart_app() self.executor.goto_url() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True for edge in edges: self.executor.click_event_by_edge(edge) #check again if executor really turn back. if not, sth error, stop state_to = self.automata.get_state_by_id( edge.get_state_to() ) dom_list, url, is_same = self.is_same_state_dom(state_to) if not is_same: try: err = State(dom_list, url) with open('debug/debug_origin_'+state_to.get_id()+'.txt', 'w') as f: f.write(state_to.get_all_dom(self.configuration)) with open('debug/debug_restart_'+state_to.get_id()+'.txt', 'w') as f: f.write(err.get_all_dom(self.configuration)) with open('debug/debug_origin_nor_'+state_to.get_id()+'.txt', 'w') as f: f.write( state_to.get_all_normalize_dom(self.configuration) ) with open('debug/debug_restart_nor_'+state_to.get_id()+'.txt', 'w') as f: f.write( err.get_all_normalize_dom(self.configuration) ) logging.error('==<BACKTRACK> cannot traceback to %s \t\t__from crawler.py backtrack()', state_to.get_id() ) except Exception as e: logging.info('==<BACKTRACK> save diff dom : %s', str(e)) dom_list, url, is_same = self.is_same_state_dom(state) return is_same def both_executors_backtrack(self, state, other_executor=None): # check if depth over max depth , time over max time logging.info("both exe backtrack") print("both exe backtrack") if (time.time() - self.time_start) > self.configuration.get_max_time(): logging.info("|||| TIMO OUT |||| end backtrack ") return #if url are same, guess they are just javascipt edges if self.executor.get_url() == state.get_url() and other_executor.get_url() == state.get_url(): #first, just refresh for javascript button logging.info('==<BACKTRACK> : try refresh') self.executor.refresh() other_executor.refresh() #!!!!!!!!CBT dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't , try go back form history logging.info('==<BACKTRACK> : both exe try back_history ') before_url = self.executor.get_url() self.executor.back_history() print('exe back') dom_list, after_url, is_same = self.is_same_state_dom(state) if before_url == other_executor.get_url(): other_executor.back_history() print('other exe back') if is_same: return True #if can't , try do last edge of state history if self.event_history: logging.info('==<BACKTRACK> : try last edge of state history') self.executor.forward_history() other_executor.forward_history() self.executor.click_event_by_edge( self.event_history[-1] ) other_executor.click_event_by_edge( self.event_history[-1] ) dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't, try go through all edge logging.info('==<BACKTRACK> : start form base ur') self.executor.goto_url() other_executor.goto_url() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True edges = self.automata.get_shortest_path(state) for edge in edges: self.executor.click_event_by_edge( edge ) other_executor.click_event_by_edge( edge ) dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't, restart and try go again logging.info('==<BACKTRACK> : restart driver') edges = self.automata.get_shortest_path(state) self.executor.restart_app() self.executor.goto_url() other_executor.restart_app() other_executor.goto_url() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True for edge in edges: self.executor.click_event_by_edge(edge) other_executor.click_event_by_edge(edge) #check again if executor really turn back. if not, sth error, stop state_to = self.automata.get_state_by_id( edge.get_state_to() ) dom_list, url, is_same = self.is_same_state_dom(state_to) if not is_same: try: err = State(dom_list, url) with open('debug/debug_origin_'+state_to.get_id()+'.txt', 'w') as f: f.write(state_to.get_all_dom(self.configuration)) with open('debug/debug_restart_'+state_to.get_id()+'.txt', 'w') as f: f.write(err.get_all_dom(self.configuration)) with open('debug/debug_origin_nor_'+state_to.get_id()+'.txt', 'w') as f: f.write( state_to.get_all_normalize_dom(self.configuration) ) with open('debug/debug_restart_nor_'+state_to.get_id()+'.txt', 'w') as f: f.write( err.get_all_normalize_dom(self.configuration) ) logging.error('==<BACKTRACK> cannot traceback to %s \t\t__from crawler.py backtrack()', state_to.get_id() ) except Exception as e: logging.info('==<BACKTRACK> save diff dom : %s', str(e)) dom_list, url, is_same = self.is_same_state_dom(state) return is_same #========================================================================================= # EVENT #========================================================================================= def make_value(self, edge): rand = random.randint(0,1000) for input_field in edge.get_inputs(): data_set = input_field.get_data_set(self.databank) #check data set value = data_set[ rand % len(data_set) ] if data_set \ else ''.join( [random.choice(string.lowercase) for i in xrange(8)] ) input_field.set_value(value) logging.info(" set input:%s value:%s "%(input_field.get_id(), value)) for select_field in edge.get_selects(): data_set = select_field.get_data_set(self.databank) #check data set selected = data_set[ rand % len(data_set) ] if data_set \ else random.randint(0, len(select_field.get_value())) select_field.set_selected(selected) logging.info(" set select:%s value:%s "%(select_field.get_id(), selected)) for checkbox_field in edge.get_checkboxes(): data_set = checkbox_field.get_data_set(self.databank) #check data set selected_list = data_set[ rand % len(data_set) ].split('/') if data_set \ else random.sample( xrange(len(checkbox_field.get_checkbox_list())), random.randint(0, len(checkbox_field.get_checkbox_list())) ) checkbox_field.set_selected_list(selected_list) logging.info(" set checkbox:%s value:%s "%(checkbox_field.get_checkbox_name(), str(selected_list))) for radio_field in edge.get_radios(): data_set = radio_field.get_data_set(self.databank) #check data set selected = data_set[ rand % len(data_set) ] if data_set \ else random.randint(0, len(radio_field.get_radio_list())) radio_field.set_selected(selected) logging.info(" set radio:%s value:%s "%(radio_field.get_radio_name(), selected)) #========================================================================================= # DECISION #========================================================================================= def is_same_domain(self, url): base_url = urlparse( self.configuration.get_url() ) new_url = urlparse( url ) if base_url.netloc == new_url.netloc: return True else: for d in self.configuration.get_domains(): d_url = urlparse(d) if d_url.netloc == new_url.netloc: return True return False def is_same_state_dom(self, cs): #cs is other executor's state,or previous state dom_list, url = self.executor.get_dom_list(self.configuration) cs_dom_list = cs.get_dom_list(self.configuration) if url != cs.get_url(): return dom_list, url, False elif len( cs_dom_list ) != len( dom_list ): return dom_list, url, False else: for dom, cs_dom in zip(dom_list, cs_dom_list): if not dom == cs_dom: return dom_list, url, False print ('same dom to: ', cs.get_id()) return dom_list, url, True def cbt_is_same_state_dom(self, cs): #cs is other executor dom_list, url = self.executor.get_dom_list(self.configuration) cs_dom_list = cs.get_dom_list(self.configuration) if url != cs.get_url(): str=("urls are different:%s v.s %s",url,cs.get_url()) return str else: for dom, cs_dom in zip(dom_list, cs_dom_list): if not dom == cs_dom: str=("dom are different") return str str = 'same dom to: ', cs.get_id() return str #========================================================================================= # TODO FOR MUTATION #========================================================================================= def run_mutant(self): self.mutation_history = [] self.mutation_cluster = {} self.mutation = Mutation(self.configuration.get_mutation_trace(), self.databank) self.mutation_traces = self.make_mutation_traces() # run a default trace for compare logging.info(" start run default trace") self.executor.start() self.executor.goto_url() initial_state = self.get_initail_state() self.run_mutant_script(initial_state) self.close() # run all mutation traces logging.info(' total %d mutation traces ', len(self.mutation_traces)) for n in xrange(len(self.mutation_traces)): logging.info(" start run number %d mutant trace", n) self.executor.start() self.executor.goto_url() initial_state = self.get_initail_state() self.run_mutant_script(initial_state, self.mutation_traces[n]) self.close() self.save_mutation_history() def make_mutation_traces(self): self.mutation.set_method(self.configuration.get_mutation_method()) self.mutation.set_modes(self.configuration.get_mutation_modes()) self.mutation.make_data_set() self.mutation.make_mutation_traces() # use a int to select sample of mutation traces mutation_traces = self.mutation.get_mutation_traces() #mutation_traces = random.sample( mutation_traces, #min( self.configuration.get_max_mutation_traces(), len(mutation_traces) ) ) return mutation_traces def run_mutant_script(self, prev_state, mutation_trace=None): depth = 0 edge_trace = [] state_trace = [prev_state] # use -1 to mark cluster_value = prev_state.get_id() if mutation_trace else "-1"+prev_state.get_id() for edge in self.configuration.get_mutation_trace(): new_edge = edge.get_copy() new_edge.set_state_from( prev_state.get_id() ) if mutation_trace: self.make_mutant_value(new_edge, mutation_trace[depth]) self.executor.click_event_by_edge(new_edge) self.event_history.append(new_edge) dom_list, url, is_same = self.is_same_state_dom(prev_state) if not is_same: logging.info(' change dom to: %s', url) # check if this is a new state temp_state = State(dom_list, url) new_state, is_newly_added = self.automata.add_state(temp_state) self.automata.add_edge(new_edge, new_state.get_id()) # save this click edge prev_state.add_clickable(edge.get_clickable(), new_edge.get_iframe_list()) if is_newly_added: logging.info(' add new state %s of: %s', new_state.get_id(), url) self.automata.save_state(new_state, depth+1) self.automata.save_state_shot(self.executor, new_state) self.automata.change_state(new_state) # save the state, edge state_trace.append( new_state ) edge_trace.append( new_edge ) cluster_value += new_state.get_id() # prepare for next edge prev_state = new_state depth += 1 self.mutation_history.append( (edge_trace, state_trace, cluster_value ) ) logging.warning( [ c for e,s,c in self.mutation_history ] ) def cluster_mutation_trace(self): #then cluster other mutation traces for edge_trace, state_trace, cluster_value in self.mutation_history: if cluster_value in self.mutation_cluster: self.mutation_cluster[cluster_value].append( (edge_trace, state_trace) ) else: self.mutation_cluster[cluster_value] = [ (edge_trace, state_trace) ] def save_mutation_history(self): self.cluster_mutation_trace() traces_data = { 'method': self.configuration.get_mutation_method(), 'traces': [] } for cluster_key, mutation_traces in self.mutation_cluster.items(): for edge_trace, state_trace in mutation_traces: trace_data = { 'edges':[], 'states':[], 'cluster_value': cluster_key } for edge in edge_trace: trace_data['edges'].append(edge.get_edge_json()) for state in state_trace: trace_data['states'].append(state.get_simple_state_json(self.configuration)) if cluster_key.startswith('-1'): traces_data['traces'].insert(0, trace_data) else: traces_data['traces'].append(trace_data) with codecs.open(os.path.join(self.configuration.get_abs_path('root'), 'mutation_traces.json'), 'w', encoding='utf-8' ) as f: json.dump(traces_data, f, indent=2, sort_keys=True, ensure_ascii=False)
class GeneticAlgorithm(object): def __init__(self, population_size, sample_genotype, crossover_rate=0.6, mutation_rate=0.2, maximize=True): self.population_size = population_size self.genotype = sample_genotype self.crossover_rate = crossover_rate self.mutation_rate = mutation_rate self.selector = RankSelector(maximize) self.crossover = OnePointCrossover() self.mutation = Mutation() self.generations = [] self.maximize = maximize def evolve(self, fitness_obj=FitnessFunction, num_generations=10): # initialize population population = [] for _ in range(self.population_size): chromosome = self.genotype.create_random_instance() population.append(chromosome) # process each generation for _ in range(num_generations): # track generations self.generations.append(population) next_population = [] # calculate fitness for population for chromosome in population: chromosome.fitness = fitness_obj.evaluate(chromosome) # select parents for generation parents = self.selector.select_pairs(population=population) # perform crossover for parent in parents: do_crossover = random.random() < self.crossover_rate if do_crossover: child_1, child_2 = self.crossover.recombine( parent[0].genes, parent[1].genes ) chrom_child_1 = Chromosome(genes=child_1) chrom_child_2 = Chromosome(genes=child_2) # add new children to next population next_population.append(chrom_child_1) next_population.append(chrom_child_2) else: # no crossover, add parents as is next_population.append(parent[0]) next_population.append(parent[1]) # do mutation do_mutation = random.random() < self.mutation_rate if do_mutation: next_population = self.mutation.mutate(self.genotype, next_population) population = next_population # calculate fitness for last generation for chromosome in population: chromosome.fitness = fitness_obj.evaluate(chromosome) return population def best_individual(self, population): population.sort(key=lambda x: x.fitness, reverse=self.maximize) best_individual = population[0] fittest = dict() for i in range(len(best_individual.genes)): fittest[self.genotype.get_label_at(i)] = best_individual.genes[i] return fittest
def __init__(self, algorithm_config): start = time.time() variable_config = algorithm_config tab_epoch = [] tab_mean = [] tab_std = [] tab_mean2 = [] tab_std2 = [] tab_epoch2 = [] tab_elitary = [] tab_epoch3 = [] tab_epoch4 = [] tab_elitary2 = [] # chrom = Chromosome(variable_config) # chrom = chrom.initialChromosome(variable_config) # print(chrom) # generujemy populację pop = Population(variable_config) popul = pop.initialPopulation(variable_config) # print(popul) randomBinSol = evaluateFitness(variable_config) # pojedyncze x decision_variables = randomBinSol.decVariables(variable_config, popul) # print(decision_variables) # dekodujemy na odpowiedniki dziesietne decimal = randomBinSol.decoding(decision_variables, variable_config) # print(decimal) # zamieniamy na zmienne rzeczywiste real = randomBinSol.realVariables(variable_config, decimal) # print(real) # obliczamy wartosc funkcji dla kazdej kolumny func_solution = randomBinSol.funcSolution(variable_config, real) # print(func_solution) tab_mean.append(np.mean(func_solution)) tab_std.append(np.std(func_solution)) tab_epoch.append(0) counter = 1 countEp = 0 selType = SelectionType(variable_config) while (counter < variable_config.T): selection = selType.selType(variable_config, func_solution, popul) # KRZYŻOWANIE # print("KRZYŻOWANIE") crosov = CrossingOver(variable_config) crosingover = crosov.crosingOver(variable_config, selection) # print(crosingover) # MUTACJA # print("MUTACJA") mutat = Mutation(variable_config) mutation = mutat.mutation(variable_config, crosingover) # print(mutation) # INWERSJA # print("INWERSJA") inver = Inversion(variable_config) inversion = inver.inversion(variable_config, mutation) # print(inversion) offspring = np.asarray(inversion) # print(offspring) decision_variables2 = randomBinSol.decVariables( variable_config, offspring) # print(decision_variables2) # dekodujemy na odpowiedniki dziesietne decimal2 = randomBinSol.decoding(decision_variables2, variable_config) # print(decimal2) # zamieniamy na zmienne rzeczywiste real2 = randomBinSol.realVariables(variable_config, decimal2) # print(real2) # obliczamy wartosc funkcji dla kazdej kolumny func_solution2 = randomBinSol.funcSolution(variable_config, real2) combin = Combinate() combination = combin.newPopulation(popul, offspring, func_solution, func_solution2, variable_config) popul = combination[0] func_solution = combination[1] best = combination[2] tab_mean.append(np.mean(func_solution)) tab_std.append(np.std(func_solution)) tab_epoch.append(counter) tab_elitary.append(best) tab_epoch3.append(countEp) print(tab_elitary) if counter != 1: tab_mean2.append(np.mean(func_solution)) tab_std2.append(np.std(func_solution)) tab_epoch2.append(counter) if countEp != 0: tab_epoch4.append(countEp) tab_elitary2.append(best) counter += 1 countEp += 1 end = time.time() print("wynik czasu") el_time = (end - start) self.__el_time = el_time self.__tab_mean = tab_mean self.__tab_std = tab_std self.__tab_epoch = tab_epoch self.__tab_mean2 = tab_mean2 self.__tab_std2 = tab_std2 self.__tab_epoch2 = tab_epoch2 self.__tab_elitary = tab_elitary self.__tab_epoch3 = tab_epoch3 self.__tab_elitary2 = tab_elitary2 self.__tab_epoch4 = tab_epoch4
pop_fsum = pop_fsum + fitness_function.sum_all_fitness() population[i] = fitness_function.chromosome parents = [] for i in range(2): #select 2 chromosomes for crossover parent = RouletteSelection(population).do_selection() parents.append(parent) point = choice(range(0, len(parents[0]), 2)) children = OnePoint().exe(parents[0], parents[1], point) probability = 100 mutated_children = [] for i in range(2): mutated_children.append(Mutation( children[i], probability).exe()) #mutate offspring i = 0 for chromosome in population: #replace current population with new one if str(chromosome) == str(parents[0]): population[i] = mutated_children[0] i = i + 1 elif str(chromosome) == str(parents[1]): population[i] = mutated_children[1] i = i + 1 else: i = i + 1 #get list of fitness for each individual fsums = [] for i in range(p):
from molecule import Molecule from mutation import Mutation from utils import connect import random import os if __name__ == "__main__": mol = Molecule("examples/DA.xyz") ewg_dir = "mutations/EWG/" edg_dir = "mutations/EDG/" ewg_files = [Mutation(ewg_dir + f) for f in os.listdir(ewg_dir)] edg_files = [Mutation(edg_dir + f) for f in os.listdir(edg_dir)] print(ewg_files) print(edg_files) mol.bonds = [(1, 7), (2, 8)] #mol.bonds = [(6,12),(3,9)] unique_structures = [] for i in range(50): print('Generating', i, '...') # Which n mutations? # Which n mutations?
class GeneticAlgorithm(object): def __init__(self, population_size, sample_genotype, crossover_rate=0.6, mutation_rate=0.2, maximize=True): self.population_size = population_size self.genotype = sample_genotype self.crossover_rate = crossover_rate self.mutation_rate = mutation_rate self.selector = RankSelector(maximize) self.crossover = OnePointCrossover() self.mutation = Mutation() self.generations = [] self.maximize = maximize def evolve(self, fitness_obj=FitnessFunction, num_generations=10): # initialize population population = [] for _ in range(self.population_size): chromosome = self.genotype.create_random_instance() population.append(chromosome) # process each generation for _ in range(num_generations): # track generations self.generations.append(population) next_population = [] # calculate fitness for population for chromosome in population: chromosome.fitness = fitness_obj.evaluate(chromosome) # select parents for generation parents = self.selector.select_pairs(population=population) # perform crossover for parent in parents: do_crossover = random.random() < self.crossover_rate if do_crossover: child_1, child_2 = self.crossover.recombine( parent[0].genes, parent[1].genes) chrom_child_1 = Chromosome(genes=child_1) chrom_child_2 = Chromosome(genes=child_2) # add new children to next population next_population.append(chrom_child_1) next_population.append(chrom_child_2) else: # no crossover, add parents as is next_population.append(parent[0]) next_population.append(parent[1]) # do mutation do_mutation = random.random() < self.mutation_rate if do_mutation: next_population = self.mutation.mutate(self.genotype, next_population) population = next_population # calculate fitness for last generation for chromosome in population: chromosome.fitness = fitness_obj.evaluate(chromosome) return population def best_individual(self, population): population.sort(key=lambda x: x.fitness, reverse=self.maximize) best_individual = population[0] fittest = dict() for i in range(len(best_individual.genes)): fittest[self.genotype.get_label_at(i)] = best_individual.genes[i] return fittest
constants = get_constants(lower=-10, upper=10, bit=True) ppl.createPopulation(functions=functions, constants=constants, n_ind=n_ind, n_gene=n_gene, n_register=n_register) eval_function = lambda x: x[0] ^ x[1] # xor ppl.excute_all(inputs, eval_function) # revolution p = ProgressBar(0, revolution) for i in range(revolution): #print("revolution: ", i) p.update(i + 1) elite = Selection.elite(ppl, elite_size) new_p = copy.deepcopy(elite) for j in range(n_ind - elite_size): parent = Selection.tournament(ppl, tourn_size) elite.append(parent) child = Crossover.randomPoints(elite, cross_rate) child = Mutation.mutation(child, mutate_rate, n_register, functions, constants) new_p.append(child) ppl.setPopulation(new_p) ppl.excute_all(inputs, eval_function) if ((i % 100) == 0): ppl.result() ppl.result() ppl.write_result(path) p.finish()
class SeleniumCrawler(Crawler): def __init__(self, configuration, executor, automata, databank, algorithm): self.configuration = configuration self.executor = executor self.automata = automata self.databank = databank #ALGO self.algorithm = algorithm self.algorithm.set_utility(self, configuration, executor, automata) #list of event:(state, clickable, inputs, selects, iframe_list) self.event_history = [] def run(self): #start time self.time_start = time.time() self.executor.start() self.executor.goto_url() initial_state = self.get_initail_state() self.run_script_before_crawl(initial_state) self.crawl(1) return self.automata def run_algorithm(self): # repeat for trace_amount times for i in range(self.configuration.get_trace_amount()): self.initial() while self.action_events: #check time if (time.time() - self.time_start) > self.configuration.get_max_time(): logging.info("|||| TIMO OUT |||| end crawl ") break string = ''.join([ str(action['action']['clickable'].get_id()) + str(action['depth']) + str(action['state'].get_id()) for action in self.action_events ]) logging.info(' action_events : ' + string) state, action, depth = self.get_next_action() self.change_state(state, action, depth) edge = self.trigger_action(state, action, depth) self.update_states(state, edge, action, depth) self.close() self.algorithm.save_traces() self.automata.save_automata( self.configuration.get_automata_fname()) Visualizer.generate_html( 'web', os.path.join(self.configuration.get_path('root'), self.configuration.get_automata_fname())) return self.automata def initial(self): self.action_events = [] #start time self.time_start = time.time() self.algorithm.prepare() current_state = self.automata.get_current_state() self.add_new_events(current_state, None, 0) def close(self): self.algorithm.end() self.executor.close() def get_next_action(self): event = self.algorithm.get_next_action(self.action_events) return event['state'], event['action'], event['depth'] def change_state(self, state, action, depth): current_state = self.automata.get_current_state() if current_state != state: self.algorithm.change_state(state, action, depth) logging.info(' now depth(%s) - max_depth(%s); current state: %s', depth, self.configuration.get_max_depth(), state.get_id()) def trigger_action(self, state, action, depth): inputs = state.get_copy_inputs(action['iframe_key']) selects = state.get_copy_selects(action['iframe_key']) checkboxes = state.get_copy_checkboxes(action['iframe_key']) radios = state.get_copy_radios(action['iframe_key']) new_edge = Edge(state.get_id(), None, action['clickable'], inputs, selects, checkboxes, radios, action['iframe_key']) self.algorithm.trigger_action(state, new_edge, action, depth) return new_edge def update_states(self, current_state, new_edge, action, depth): dom_list, url, is_same = self.is_same_state_dom(current_state) if is_same: self.algorithm.update_with_same_state(current_state, new_edge, action, depth, dom_list, url) if self.is_same_domain(url): logging.info(' |depth:%s state:%s| change dom to: %s', depth, current_state.get_id(), self.executor.get_url()) # check if this is a new state temp_state = State(dom_list, url) new_state, is_newly_added = self.automata.add_state(temp_state) self.automata.add_edge(new_edge, new_state.get_id()) # save this click edge current_state.add_clickable(action['clickable'], action['iframe_key']) self.automata.change_state(new_state) # depth GO ON depth += 1 self.event_history.append(new_edge) if is_newly_added: self.algorithm.update_with_new_state(current_state, new_state, new_edge, action, depth, dom_list, url) else: self.algorithm.update_with_old_state(current_state, new_state, new_edge, action, depth, dom_list, url) else: self.algorithm.update_with_out_of_domain(current_state, new_edge, action, depth, dom_list, url) def add_new_events(self, state, prev_state, depth): self.algorithm.add_new_events(state, prev_state, depth) #========================================================================================= # BASIC CRAWL #========================================================================================= def get_initail_state(self): logging.info(' get initial state') dom_list, url = self.executor.get_dom_list(self.configuration) initial_state = State(dom_list, url) is_new, state = self.automata.set_initial_state(initial_state) if is_new: self.automata.save_state(self.executor, initial_state, 0) self.automata.save_state_shot(self.executor, initial_state) else: self.automata.change_state(state) time.sleep(self.configuration.get_sleep_time()) return state def run_script_before_crawl(self, prev_state): for edge in self.configuration.get_before_script(): self.executor.click_event_by_edge(edge) self.event_history.append(edge) dom_list, url, is_same = self.is_same_state_dom(prev_state) if is_same: continue logging.info(' change dom to: ', self.executor.get_url()) # check if this is a new state temp_state = State(dom_list, url) new_state, is_newly_added = self.automata.add_state(temp_state) self.automata.add_edge(edge, new_state.get_id()) # save this click edge prev_state.add_clickable(edge.get_clickable(), edge.get_iframe_list()) if is_newly_added: logging.info(' add new state %s of: %s', new_state.get_id(), url) self.automata.save_state(new_state, 0) self.automata.save_state_shot(self.executor, new_state) self.automata.change_state(new_state) prev_state = new_state #============================================================================================= # BACKTRACK #============================================================================================= def executor_backtrack(self, state, *executors): # check if depth over max depth , time over max time if (time.time() - self.time_start) > self.configuration.get_max_time(): logging.info("|||| TIMO OUT |||| end backtrack ") return #if url are same, guess they are just javascipt edges if executors[0].get_url() == state.get_url(): #first, just refresh for javascript button logging.info('==<BACKTRACK> : try refresh') for exe in executors: exe.refresh() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't , try go back form history logging.info('==<BACKTRACK> : try back_history ') for exe in executors: exe.back_history() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True logging.info('==<BACKTRACK> : try back_script ') for exe in executors: exe.back_script() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't , try do last edge of state history if self.event_history: logging.info('==<BACKTRACK> : try last edge of state history') for exe in executors: exe.forward_history() exe.click_event_by_edge(self.event_history[-1]) dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't, try go through all edge logging.info('==<BACKTRACK> : start form base ur') for exe in executors: exe.goto_url() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True for edge in self.automata.get_shortest_path(state): for exe in executors: exe.click_event_by_edge(edge) dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't, restart and try go again logging.info('==<BACKTRACK> : retart driver') for exe in executors: exe.restart_app() exe.goto_url() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True for edge in self.automata.get_shortest_path(state): for exe in executors: exe.click_event_by_edge(edge) #check again if executor really turn back. if not, sth error, stop state_to = self.automata.get_state_by_id(edge.get_state_to()) dom_list, url, is_same = self.is_same_state_dom(state_to) if not is_same: try: debug_dir = os.path.join( self.configuration.get_abs_path('dom'), state.get_id(), 'debug') if not os.path.isdir(debug_dir): os.makedirs(debug_dir) err = State(dom_list, url) with codecs.open(os.path.join( debug_dir, 'debug_origin_' + state_to.get_id() + '.txt'), 'w', encoding='utf-8') as f: f.write(state_to.get_all_dom(self.configuration)) with codecs.open(os.path.join( debug_dir, 'debug_restart_' + state_to.get_id() + '.txt'), 'w', encoding='utf-8') as f: f.write(err.get_all_dom(self.configuration)) with codecs.open(os.path.join( debug_dir, 'debug_origin_nor_' + state_to.get_id() + '.txt'), 'w', encoding='utf-8') as f: f.write( state_to.get_all_normalize_dom(self.configuration)) with codecs.open(os.path.join( debug_dir, 'debug_restart_nor_' + state_to.get_id() + '.txt'), 'w', encoding='utf-8') as f: f.write(err.get_all_normalize_dom(self.configuration)) logging.error( '==<BACKTRACK> cannot traceback to %s \t\t__from crawler.py backtrack()', state_to.get_id()) except Exception as e: logging.info('==<BACKTRACK> save diff dom : %s', str(e)) dom_list, url, is_same = self.is_same_state_dom(state) return is_same #========================================================================================= # EVENT #========================================================================================= def make_value(self, edge): rand = random.randint(0, 1000) for input_field in edge.get_inputs(): data_set = input_field.get_data_set(self.databank) #check data set value = data_set[ rand % len(data_set) ] if data_set \ else ''.join( [random.choice('abcdefghijklmnopqrstuvwxyz') for i in range(8)] ) input_field.set_value(value) logging.info(" set input:%s value:%s " % (input_field.get_id(), value)) for select_field in edge.get_selects(): data_set = select_field.get_data_set(self.databank) #check data set selected = data_set[ rand % len(data_set) ] if data_set \ else random.randint(0, len(select_field.get_value())) select_field.set_selected(selected) logging.info(" set select:%s value:%s " % (select_field.get_id(), selected)) for checkbox_field in edge.get_checkboxes(): data_set = checkbox_field.get_data_set(self.databank) #check data set selected_list = data_set[ rand % len(data_set) ].split('/') if data_set \ else random.sample( range(len(checkbox_field.get_checkbox_list())), random.randint(0, len(checkbox_field.get_checkbox_list())) ) checkbox_field.set_selected_list(selected_list) logging.info( " set checkbox:%s value:%s " % (checkbox_field.get_checkbox_name(), str(selected_list))) for radio_field in edge.get_radios(): data_set = radio_field.get_data_set(self.databank) #check data set selected = data_set[ rand % len(data_set) ] if data_set \ else random.randint(0, len(radio_field.get_radio_list())) radio_field.set_selected(selected) logging.info(" set radio:%s value:%s " % (radio_field.get_radio_name(), selected)) #========================================================================================= # DECISION #========================================================================================= def is_same_domain(self, url): base_url = urlparse(self.configuration.get_url()) new_url = urlparse(url) if base_url.netloc == new_url.netloc: return True else: for d in self.configuration.get_domains(): d_url = urlparse(d) if d_url.netloc == new_url.netloc: return True return False def is_same_state_dom(self, cs): dom_list, url = self.executor.get_dom_list(self.configuration) cs_dom_list = cs.get_dom_list(self.configuration) if url != cs.get_url(): return dom_list, url, False elif len(cs_dom_list) != len(dom_list): return dom_list, url, False else: for dom, cs_dom in zip(dom_list, cs_dom_list): if not dom == cs_dom: return dom_list, url, False print('same dom to: ', cs.get_id()) return dom_list, url, True #========================================================================================= # TODO FOR MUTATION #========================================================================================= def run_mutant(self): self.mutation_history = [] self.mutation_cluster = {} self.mutation = Mutation(self.configuration.get_mutation_trace(), self.databank) self.mutation_traces = self.make_mutation_traces() # run a default trace for compare logging.info(" start run default trace") self.executor.start() self.executor.goto_url() initial_state = self.get_initail_state() self.run_mutant_script(initial_state) self.close() # run all mutation traces logging.info(' total %d mutation traces ', len(self.mutation_traces)) for n in xrange(len(self.mutation_traces)): logging.info(" start run number %d mutant trace", n) self.executor.start() self.executor.goto_url() initial_state = self.get_initail_state() self.run_mutant_script(initial_state, self.mutation_traces[n]) self.close() self.save_mutation_history() def make_mutation_traces(self): self.mutation.set_method(self.configuration.get_mutation_method()) self.mutation.set_modes(self.configuration.get_mutation_modes()) self.mutation.make_data_set() self.mutation.make_mutation_traces() # use a int to select sample of mutation traces mutation_traces = self.mutation.get_mutation_traces() #mutation_traces = random.sample( mutation_traces, # min( self.configuration.get_max_mutation_traces(), len(mutation_traces) ) ) return mutation_traces def run_mutant_script(self, prev_state, mutation_trace=None): depth = 0 edge_trace = [] state_trace = [prev_state] # use -1 to mark cluster_value = prev_state.get_id( ) if mutation_trace else "-1" + prev_state.get_id() for edge in self.configuration.get_mutation_trace(): new_edge = edge.get_copy() new_edge.set_state_from(prev_state.get_id()) if mutation_trace: self.make_mutant_value(new_edge, mutation_trace[depth]) self.executor.click_event_by_edge(new_edge) self.event_history.append(new_edge) dom_list, url, is_same = self.is_same_state_dom(prev_state) if not is_same: logging.info(' change dom to: %s', url) # check if this is a new state temp_state = State(dom_list, url) new_state, is_newly_added = self.automata.add_state(temp_state) self.automata.add_edge(new_edge, new_state.get_id()) # save this click edge prev_state.add_clickable(edge.get_clickable(), new_edge.get_iframe_list()) if is_newly_added: logging.info(' add new state %s of: %s', new_state.get_id(), url) self.automata.save_state(new_state, depth + 1) self.automata.save_state_shot(self.executor, new_state) self.automata.change_state(new_state) # save the state, edge state_trace.append(new_state) edge_trace.append(new_edge) cluster_value += new_state.get_id() # prepare for next edge prev_state = new_state depth += 1 self.mutation_history.append((edge_trace, state_trace, cluster_value)) logging.warning([c for e, s, c in self.mutation_history]) def cluster_mutation_trace(self): #then cluster other mutation traces for edge_trace, state_trace, cluster_value in self.mutation_history: if cluster_value in self.mutation_cluster: self.mutation_cluster[cluster_value].append( (edge_trace, state_trace)) else: self.mutation_cluster[cluster_value] = [(edge_trace, state_trace)] def save_mutation_history(self): self.cluster_mutation_trace() traces_data = { 'method': self.configuration.get_mutation_method(), 'traces': [] } for cluster_key, mutation_traces in self.mutation_cluster.items(): for edge_trace, state_trace in mutation_traces: trace_data = { 'edges': [], 'states': [], 'cluster_value': cluster_key } for edge in edge_trace: trace_data['edges'].append(edge.get_edge_json()) for state in state_trace: trace_data['states'].append( state.get_simple_state_json(self.configuration)) if cluster_key.startswith('-1'): traces_data['traces'].insert(0, trace_data) else: traces_data['traces'].append(trace_data) with codecs.open(os.path.join(self.configuration.get_abs_path('root'), 'mutation_traces.json'), 'w', encoding='utf-8') as f: json.dump(traces_data, f, indent=2, sort_keys=True, ensure_ascii=False)
#from autots import Molecule #from autots import Mutation #from autots import connect from molecule import Molecule from mutation import Mutation from utils import connect import random if __name__ == "__main__": mol = Molecule("examples/diels-alder.xyz") muts1 = [ Mutation("mutations/8.xyz"), #Mutation("mutations/cn.xyz"), #Mutation("mutations/cooh.xyz"), #Mutation("mutations/nh2.xyz"), #Mutation("mutations/oh.xyz") ] unique_structures = [] for i in range(100): print('Generating', i, '...') # How many mutations? #n = random.randint(1, 3) n = 6 # Which n bonds?
# num = random.randint(0, numOfLuckyFew) # isInList(which_lucky_few, num, numOfLuckyFew) # which_lucky_few.append(num) # #for x in range(numOfLuckyFew): # lucky_few.append(temp_population[which_lucky_few[x]]) # #print("The lucky few that will survive are: " + str(lucky_few)) Breeders = Breeders(population, password, best_sample, lucky_few) print("The next generation is: ") print(Breeders.selectFromPopulation(Breeders.computePerfPopulation(population, password), best_sample, lucky_few)) print("Time for reproduction...") Reproduction = Reproduction() /// TODO print("What chance of mutation?") chance_of_mutation = int(input()) Mutation = Mutation(population, chance_of_mutation) population = Mutation.mutatePopulation(population, chance_of_mutation) print("The new population is: ") print(population) #Fitness = Fitness() #Reproduction = Reproduction()
p = Population(folder) # train initial individuals for i in range(0, p.__len__()): if p.individuals[i].accuracy == 0: a = p.individuals[i] acc = worker.test(a) p.individuals[i].accuracy = acc a.accuracy = acc print("train", folder.file_name(p.individuals[i]), "to", acc) folder.create_file(a.to_proto()) # begin evolution while folder.history.__len__() < C: print("This is", folder.history.__len__()-p.__len__()+1, " cycle:") sample = random.sample(range(p.__len__()), k=S) parent = Arch() for s in sample: if p.individuals[s].accuracy > parent.accuracy: parent = copy(p.individuals[s]) child = copy(parent) # mutation Mutation.hidenStateMutate(child) acc = worker.test(child) child.accuracy = acc print("train child to", acc) p.add(child) p.dead()
import random import numpy as np from math import sin, cos, pi from population import Population from selection import Selection from crossover import Crossover from mutation import Mutation from toolkits import GAEngine f = lambda x, y: y * sin(2 * pi * x) + x * cos(2 * pi * y) population = Population(100, [-2, 2], [-2, 2]).init() selection = Selection(f, 100) crossover = Crossover(pe=0.5) mutation = Mutation([-2, 2], [-2, 2], pm=0.5) engine = GAEngine(population, selection, crossover, mutation) if '__main__' == __name__: engine.run(200)
def __mutation(self): mutator = Mutation(self.__childs, self.mutation_probability) mutator.perform()
def update(self, opt, select_pressure, mutagenic_pressure, t_curr, prolif_adj, all_muts): """Update this clone and its children for one time step.""" if self.is_dead_end(): return ( 0, 0, 0, 0, ) new_pop_size = new_sub_count = new_mut_agg = new_pro_agg = 0 if not self.is_dead(): if self.is_resistant: # warning: as currently implemented, the value of # resist_strength is not actually guaranteed to be # in the interval [0,1] eff_pressure = select_pressure * (1.0 - self.resist_strength) effective_prolif = self.prolif_rate - prolif_adj - eff_pressure effective_mut = self.mut_rate else: effective_prolif = self.prolif_rate - prolif_adj - select_pressure if mutagenic_pressure: effective_mut = self.mut_rate * mutagenic_pressure else: effective_mut = self.mut_rate # sample for cell death, division and mutation # note that we sample for both division AND death before # updating the clone size. This means that a 'cell' # can reproduce and die in the same cycle # (i.e. if cells_dead + cells_new > initial_size) cells_new = safe_binomial_sample(self.size, effective_prolif) cells_dead = safe_binomial_sample(self.size, self.death_rate) self.size = self.size + cells_new - cells_dead # this is the total number of mutations this cycle, # not necessarily number of new subclones to spawn new_mutns = safe_binomial_sample(cells_new, effective_mut) else: # clone is dead - update its attributes # if it died on the previous cycle if self.size < 0: self.size = 0 if not self.d_time: self.d_time = t_curr # update child nodes - whether or not clone is alive for node in self.nodes: node_results = node.update(opt, select_pressure, mutagenic_pressure, t_curr, prolif_adj, all_muts) node_pop, node_sub_count, node_mut_agg, node_pro_agg = node_results new_pop_size += node_pop new_sub_count += node_sub_count new_mut_agg += node_mut_agg new_pro_agg += node_pro_agg # check again whether clone is alive; # clones which have died in this update # will now register as dead if not self.is_dead(): for _i in xrange(new_mutns): new_mutn = Mutation(opt, t_curr, all_muts) if self.is_neutral_mutn(new_mutn): new_mutn.classify_neutral(all_muts) new_mutn.original_clone = self self.num_neutral_mutns += 1 else: self.new_child(t_curr, opt, new_mutn) self.size -= 1 new_sub_count += 1 new_pop_size += 1 # finally, add this clone's stats to the return values new_pop_size += self.size new_sub_count += 1 # return aggregate prolif and mut rates, ignoring # selective and mutagenic pressure new_mut_agg += self.mut_rate * self.size new_pro_agg += (self.prolif_rate - prolif_adj) * self.size return new_pop_size, new_sub_count, new_mut_agg, new_pro_agg