def make_initial_population(COMM = None, toolbox = None, n = None): rank = get_rank(COMM) if rank == 0: print '\t{} Core {} Building initial population'.format(get_time(), rank) population = toolbox.population(n = n) else: population = None return population
def calculate_statistics(COMM = None, generation = None, population = None): rank = get_rank(COMM) if rank == 0: print '\t{} Core {} Calculating statistics'.format(get_time(), rank) fitnesses = get_fitnesses(population) avg = np.mean(fitnesses) sd = np.std(fitnesses) min_val = np.min(fitnesses) max_val = np.max(fitnesses) with open('stats.out', 'a') as f_ptr: f_ptr.write('{}\t{}\t{}\t{}\t{}\n'.format(generation, avg, sd, min_val, max_val))
def mutate_offspring(COMM = None, toolbox = None, population = None): rank = get_rank(COMM) if rank == 0: print '\t{} Core {} Mutating offspring'.format(get_time(), rank) #print '\tApplying mutations' for mutant in population: toolbox.mutate(mutant) del mutant.fitness.values else: population = None return population
def evaluate_population(COMM = None, toolbox = None, population = None): rank = get_rank(COMM) size = get_size(COMM) if rank == 0: jobs_split = np.array_split(range(len(population)), size) population_split = [] for jobs in jobs_split: x = [] for job in jobs: x.append(population[job]) population_split.append(x) print '\t{} Core {} Distributing individuals'.format(get_time(), rank) else: population_split = None jobs_split = None if COMM is None: population_mpi = population jobs_mpi = range(len(population)) else: population_mpi = COMM.scatter(population_split, root = 0) jobs_mpi = COMM.scatter(jobs_split, root = 0) #Evaluate fitness fitnesses_mpi = {} for i, individual_mpi in zip(jobs_mpi, population_mpi): fitnesses_mpi[i] = toolbox.evaluate(individual_mpi) print '\t{} Core {} Finished evaluating individuals'.format(get_time(), rank) if COMM is None: fitnesses_list = [fitnesses_mpi] else: fitnesses_list = MPI.COMM_WORLD.gather(fitnesses_mpi, root = 0) if rank == 0: print '\t{} Core {} Assigning fitness to population.'.format(get_time(), rank) for fitnesses_dict in fitnesses_list: for i, fitness in fitnesses_dict.iteritems(): population[i].fitness.values = fitness else: population = None return population
def generate_offspring(COMM = None, toolbox = None, population = None, cxpb = None): rank = get_rank(COMM) if rank == 0: print '\t{} Core {} Generating offspring'.format(get_time(), rank) print '# Offspring before selection: {}'.format(len(population)) offspring = toolbox.select(population) print '# Offspring after selection: {}'.format(len(offspring)) offspring = [toolbox.clone(individual) for individual in offspring] #Apply crossover and mutation on the offspring #print '\tMaking children' for child1, child2 in zip(offspring[::2], offspring[1::2]): if random.random() < cxpb: toolbox.mate(child1, child2) del child1.fitness.values del child2.fitness.values else: offspring = None return offspring
def print_generation_number(COMM = None, generation = None): rank = get_rank(COMM) if rank == 0: print '{} Core {} Generation {}'.format(get_time(), rank, generation)
def make_next_population(COMM = None, population = None, offspring = None): rank = get_rank(COMM) if rank == 0: print '\t{} Core {} Generating new offspring'.format(get_time(), rank) population[:] = offspring return population
def run_cluster_expansion(train_path, clusters_path, configs_all_path, log_path, submit_job=True, n_new=1, job_array=False): #Read cluster data print 'Reading cluster data' clusters = Clusters.from_excel(clusters_path) #Read training structures print 'Reading configuration training data' configs_train = Configurations.from_vasp(train_path) configs_train.calc_E_fit() #Read all training structures print 'Reading all configuration data' configs_all = Configurations.from_excel(configs_all_path) print 'Finding difference' configs_new = get_difference(configs_all, configs_train) #Generating correlation matrices print 'Generating correlation matrix for training structures' pi_train = get_correlation_matrix(configurations=configs_train, clusters=clusters) print 'Generating correlation matrix for new structures' pi_new = get_correlation_matrix(configurations=configs_new, clusters=clusters) #Find structures that would result in better CV score print 'Calculating similarity of new configurations to training configurations' configs_difference = get_configuration_difference(pi_train, pi_new) #Run Cluster Expansion Model print 'Running Lasso with Leave-One-Out Cross Validation' try: clf = LassoCV(copy_X=True, cv=len(configs_train), fit_intercept=True) except: print configs_train.get_E_fit() clf.fit(pi_train, configs_train.get_E_fit()) #Print Model Data Js = clf.coef_ intercept = clf.intercept_ #Calculate energies print 'Calculating energies using Cluster Expansion' CE_E_new = get_energies(correlation_mat=pi_new, Js=Js, intercept=intercept) cv = np.average(clf.mse_path_[-1]) else: cv = 1. CE_E_new = np.zeros(len(configs_difference)) #Start DFT calculation for structure j = 0 new_structures = [] new_indices = [] for n in xrange(len(configs_new)): new_index = get_best_structure(CE_E_new, configs_difference, cv=cv, cv_limit=0.0025, n=n) print 'Attempting to submit {}'.format(configs_new[new_index].name) successful_submit = run_In2O3_configuration(configs_new[new_index], rel_path=train_path, submit_job=submit_job, job_array=job_array) if successful_submit: j = j + 1 new_structures.append(configs_new[new_index].name) new_indices.append(new_index) else: print 'Failed to submit {}'.format(configs_new[new_index].name) if j >= n_new: break else: print 'Could not find structure to submit.' new_structures = ['Nan'] print 'Updating log file, {}'.format(log_path) with open(log_path, 'a') as log_ptr: log_ptr.write('{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format( get_time(), clf.alpha_, np.average(clf.mse_path_[-1]), count_nonsparse(Js=Js), '\t'.join([new_structure for new_structure in new_structures]), '\t'.join([ str(configs_difference[new_index]) for new_index in new_indices ]), '\t'.join([str(CE_E_new[new_index]) for new_index in new_indices])))