Пример #1
0
	def err_func(outs, inps, dims, gene, nr_of_pars, op_table):
		"""
		Example of an error function, without parameters or anything fancy. 
		Just a minimization of the L2-norm.
		"""
		dims = len(inps[0])
		cgp = CGP(dims, op_table, gene)
		n = len(inps)
		assert len(outs) == n

		s = 0.0
		for i in range(n):
			tmp = outs[i] - cgp.eval(inps[i])
			s += tmp*tmp
		s /= float(n)
		return sqrt(s)
Пример #2
0
    Operation("log"),
    Operation("/")
]

h = 1.0e-9

err = 0.0
counter = 0.0
for _ in range(10):
    nr_of_nodes = randint(1, 15)
    dims = randint(1, 4)
    gene = create_random_gene(dims, len(op_table), nr_of_nodes)

    cgp = CGP(dims, op_table, gene, nr_of_parameters=0)

    for _ in range(10):
        pnt = [gauss(0, 10) for _ in range(dims)]

        for d in range(dims):
            pnt_shift = list(pnt)
            pnt_shift[d] += h
            numerical_der = (cgp.eval(pnt_shift) - cgp.eval(pnt)) / h
            analytical_der = cgp.eval(pnt, derivative=True, der_dir=d)
            diff = analytical_der[1] - numerical_der

            err += diff * diff
            counter += 1.0
err = sqrt(err / counter)
print("Test 2 error:", err)

print("Both errors should be below 1.0e-5")
def starting_point_approximation(func, nr_of_parameters, parameter_ranges, optimizer, max_iter=1000, multi_starts=2, nr_of_samples_per_parameter=25, nr_of_parameters_in_cgp=3, max_time=None, symbolic_der=None):

	assert nr_of_samples_per_parameter > 1
	assert nr_of_parameters >= 0
	if nr_of_parameters == 0:
		# I guess we don't really need an approximation 
		# function for the starting point in this case. I mean, there will 
		# only be one value for the root.
		print("There are no parameters in the input function! Then there is no need for this program.")
		assert False
	else:

		# Make sure that the input data even makes sense.
		assert len(parameter_ranges) == nr_of_parameters
		for tmp in parameter_ranges:
			assert len(tmp) == 2
			assert tmp[0] < tmp[1]

		# Calculate the total number of samples.
		nr_of_samples = 1
		for _ in range(nr_of_parameters):
			nr_of_samples *= nr_of_samples_per_parameter

		# Generate random parameter points in the given range. 
		# TODO: this is stupid, change later! We really should sample on a nice
		# Cartesian grid.
		parameter_samples = [[0.0 for _ in range(nr_of_parameters)] for _ in range(nr_of_samples)]
		for i in range(nr_of_samples):
			for d in range(nr_of_parameters):
				parameter_samples[i][d] = random()*(parameter_ranges[d][1]-parameter_ranges[d][0])+parameter_ranges[d][0]


		# Let's get the derivative as well. 
		if symbolic_der == None:
			func_der = lambda x, a: (func([x[0]+1.0e-11] , a) - func([x[0]],a))/1.0e-11
		else:
			func_der = symbolic_der
		# Step 1 
		# For each point, find the x val that is the (or a) root.
		# Do this using Newton-Raphson.
		root_samples_and_errors = [root_finders(func, func_der, parameter_samples[i]) for i in range(nr_of_samples)]
		
		# Remove all points that didn't converge
		converge_thresh = 1.0e-8
		remove_idxs = []
		counter = 0
		for i in range(nr_of_samples):
			err = root_samples_and_errors[counter][1]
			if err > converge_thresh:
				tmp = root_samples_and_errors.pop(counter)
				parameter_samples.pop(counter)
				assert tmp[1] > converge_thresh
				counter -= 1
			counter += 1

		root_samples = [tmp[0] for tmp in root_samples_and_errors]
		errors_samples = [tmp[1] for tmp in root_samples_and_errors]

		assert max(errors_samples) <= converge_thresh

		filtered_quota = 1.0-len(root_samples)/float(nr_of_samples)
		print("How many were filtered:", 100*(1.0-len(root_samples)/float(nr_of_samples)),"%")

		if filtered_quota > 5:
			# TODO: Do something in this case
			assert False

		# Step 2
		# Run a symbolic regression to find a good approximation for the root.
		# This is used as a starting point.
		(cgp, best_err, parameters) = starting_point_approximation_symbolic_regression(root_samples, parameter_samples, optimizer, max_iter=max_iter, nr_of_parameters=nr_of_parameters_in_cgp, max_time=max_time)

		# Step 2 and a half
		# The symbolic regression (tries to) ignore all constant solutions, so those should be checked as well.
		mean = sum(root_samples)/float(len(root_samples))
		error_from_mean = sqrt(sum((r-mean)*(r-mean) for r in root_samples) / float(len(root_samples)))
		if error_from_mean < best_err:
			print("DOING THE CONST THING:", error_from_mean)
			# Create a new gene that represents a constant func
			new_gene = [0] * len(cgp.gene)
			new_gene[-1] = cgp.dims+0

			cgp = CGP(cgp.dims, cgp.op_table, new_gene, nr_of_parameters=1)
			parameters = [mean]

			for _ in range(10):
				assert cgp.eval([random() for _ in range(cgp.dims)], parameters=parameters) == mean
			best_err = error_from_mean

		return (cgp, best_err, parameters)