def __init__(self, pos=[13, 13], length=5, base_max_moves=100, field_width=25, field_height=25): self.field_width = field_width self.field_height = field_height self.position = np.array([pos[0], pos[1]]) self.velocity = self.directions[np.random.choice([0, 2, 4, 6])] self.length = length self.score = 0 self.brain = Network([24, 18, 4]) self.tail = np.empty(shape=[0, 2]) for i in range(self.length - 1, 0, -1): self.tail = np.append(self.tail, [self.position - i * self.velocity], axis=0) self.alive = True self.time_alive = 0 self.max_moves = base_max_moves self.moves_left = self.max_moves self.grow_count = 0 self.lastMoveDir = np.array(self.velocity) self.food = self.place_food() self.vision = np.zeros(24) self.fitness = 0
def __init__(self): self.NN = Network(5, 1) self.alive = True self.reached_pilars = 0 self.score = 0 self.rect = pygame.Rect((int(WIDTH / 2), int(HEIGHT / 2)), (40, 40)) self.movement_y = 0 self.bool = True self.bool2 = True
def load_snake(cls, filename): pickle_in = open(filename, 'rb') data = pickle.load(pickle_in) pickle_in.close() brain = Network(data["brain"]["sizes"]) brain.weights = [np.array(w) for w in data["brain"]["weights"]] brain.biases = [np.array(b) for b in data["brain"]["biases"]] snake = AutonomousSnake() snake.fitness = data["fitness"] snake.brain = brain return snake
def build_neural_network( network_input_perceptrons_indexes: List[int], network_output_perceptrons_indexes: List[int], perceptrons_details: List[Tuple[Callable[[float], float], List[int], List[float]]], ) -> Network: """ Takes the output of read as input and build a Network object with it. """ perceptron_nbr = len(perceptrons_details) network_input_perceptrons_indexes = [index % perceptron_nbr for index in network_input_perceptrons_indexes] network_output_perceptrons_indexes = [index % perceptron_nbr for index in network_output_perceptrons_indexes] perceptrons = [ (NetworkInput if index in network_input_perceptrons_indexes else Perceptron)(activation_function, [], []) for index, (activation_function, _, _) in enumerate(perceptrons_details) ] for (_, input_perceptrons_indexes, weights), perceptron in zip(perceptrons_details, perceptrons): for index, weight in zip(input_perceptrons_indexes, weights): perceptron.add_as_input(perceptrons[index % len(perceptrons)], weight) network_input_perceptrons, network_output_perceptrons, hidden_perceptrons = [], [], [] for index, perceptron in enumerate(perceptrons): index = index % perceptron_nbr if index not in network_input_perceptrons_indexes and index not in network_output_perceptrons_indexes: hidden_perceptrons.append(perceptron) for index in network_input_perceptrons_indexes: network_input_perceptrons.append(perceptrons[index % perceptron_nbr]) for index in network_output_perceptrons_indexes: network_output_perceptrons.append(perceptrons[index % perceptron_nbr]) return Network(network_input_perceptrons, hidden_perceptrons, network_output_perceptrons)
def train_network(): # prepare the network net = Network([4, 4, 3], 0.02) # store the generated net data for comparison net_data = net.export_data() def train(activation_f): net.import_data(net_data) # restore original for comparison purposes net.set_activation_f(activation_f) errors_data = [] for i, error in enumerate(net.teach_loop(samples), start=1): errors_data.append(error) if i >= cycles: break return errors_data funs_data = [] for f in funcs: print(f.__name__) funs_data.append(train(f)) return funs_data
def __init__(self): self._model = OpenDssEngine() self._config = Configuration() self._model.start() self._max_episodes = 1000 self._steps_per_episode = self._model.get_episode_length() self._actions_per_step = self._model.get_actions_per_step() self._possible_actions = self._model.get_possible_actions() + [None] self._possible_actions_map = dict({(i, action) for i, action in enumerate(self._possible_actions)}) self._weekdays_map = {'MON': 1, 'TUE': 2, 'WED': 3, 'THU': 4, 'FRI': 5, 'SAT': 6, 'SUN': 7} self._voltage_targets = None self._current_episode = 1 self._current_step = 1 self._actions_taken = 0 self._training_steps = 0 self._agent = Agent(self._possible_actions_map) self._online_network = Network(len(self._model.get_state().state_space_repr(self._current_step, self._actions_taken, self._weekdays_map[self._model.get_weekday()])), len(self._possible_actions), self._model.get_file_name()) self._target_network = Network(len(self._model.get_state().state_space_repr(self._current_step, self._actions_taken, self._weekdays_map[self._model.get_weekday()])), len(self._possible_actions), self._model.get_file_name()) self.sync_networks()
def main(): size_of_learn_sample = int(len(x) * 0.9) print(size_of_learn_sample) nn = Network(x, y, 0.5) nn.train() nn.printer()
def _random_genes(self): net = Network(2, 1) net.add_neuron(connections=['in0', 'in1'], weights=[self._random_weight(), self._random_weight()], bias=self._random_weight(), activation='sigmoid') net.add_neuron(connections=['in0', 'in1'], weights=[self._random_weight(), self._random_weight()], bias=self._random_weight(), activation='sigmoid') net.neurons['out0'].connections = ['h0', 'h1'] net.neurons['out0'].weights = [ self._random_weight(), self._random_weight() ] net.neurons['out0'].neuron_type = 'feed_forward_neuron' net.neurons['out0'].bias = self._random_weight() net.neurons['out0'].activation = 'sigmoid' return net
from data_loader import DataLoader from neural_network import Network from training_parameters import LAYER_SIZES, MINI_BATCH_SIZE, EPOCHS, LEARNING_RATE from config import VERBOSE, TRAIN_IMAGE_DIR, TRAIN_LABEL_DIR,\ TRAINING_DATA_SKIP_FOOTER, TEST_IMAGE_DIR, TEST_LABEL_DIR, TEST_DATA_SKIP_FOOTER, TEST_PREDICTIONS_FILENAME if VERBOSE: print('loading training data...') training_data = DataLoader.get_training_data(TRAIN_IMAGE_DIR, TRAIN_LABEL_DIR, TRAINING_DATA_SKIP_FOOTER) if VERBOSE: print('loading test data...') test_data = DataLoader.get_test_data(TEST_IMAGE_DIR, TEST_LABEL_DIR, TEST_DATA_SKIP_FOOTER) if VERBOSE: print('begin training...') neural_network = Network(LAYER_SIZES) neural_network.train(training_data, test_data, MINI_BATCH_SIZE, EPOCHS, LEARNING_RATE, VERBOSE, TEST_PREDICTIONS_FILENAME)
print('Episode:{0}\tScore:{1}'.format(episode_ct, myscore)) # Close the window if window._open: window.close() return total_score / N if __name__ == '__main__': # Load data X, y = load_data('./data/expert_q.txt') # Initialize the model nn = Network(hidden_layers=(256, 256, 256, 256), lr=9e-2, epoch=600, bias=True, batch_size=64) try: nn.load_weights() except: print('WARNING: No pre-trained networks!') # Training nn.train(X, y) nn.save_weights() # Display accuracy accuracy(nn, X, y) print() # Simulate 1,000 games
from neural_network import Network network = Network(training_iteration=500000, learning_rate=0.3, error_threshold=0.0001) network.add_layer(3, 2) network.add_layer(1) network.train([ [[0, 0], [0]], [[0, 1], [1]], [[1, 0], [1]], [[1, 1], [1]], ]) output = network.process([0, 0]) print('0 OR 0 = {}'.format(output)) output = network.process([0, 1]) print('0 OR 1 = {}'.format(output)) output = network.process([1, 0]) print('1 OR 0 = {}'.format(output)) output = network.process([1, 1]) print('1 OR 1 = {}'.format(output))
def main(): net = Network(2, 1) net.add_neuron(connections=['in0', 'in1'], weights=[+20, +20], bias=-30, activation='sigmoid') net.add_neuron(connections=['in0', 'in1'], weights=[-20, -20], bias=+10, activation='sigmoid') net.neurons['out0'].connections = ['h0', 'h1'] net.neurons['out0'].weights = [+20, +20] net.neurons['out0'].neuron_type = 'feed_forward_neuron' net.neurons['out0'].bias = -10 net.neurons['out0'].activation = 'sigmoid' net.save_network_to_file('truth.network.json') print('Truth network (#%s):' % net.name) print('%0.4f' % net.fitness(dataset)) test_sample(net, [0, 0]) test_sample(net, [0, 1]) test_sample(net, [1, 0]) test_sample(net, [1, 1]) print '------------------------------------' try: net = Network() net.load_network_from_file('result.network.json') print('result network (#%s):' % net.name) print('%0.4f' % net.fitness(dataset)) test_sample(net, [0, 0]) test_sample(net, [0, 1]) test_sample(net, [1, 0]) test_sample(net, [1, 1]) print '------------------------------------' except IOError: pass
from data.mnist_loader import load_data_wrapper from neural_network import Network import time # Load_data_wrapper has a hardcoded reference to the mnist data file. # If you move this file you should update this reference training_data, validation_input, test_data = load_data_wrapper() # Hyper parameters layer_1_hidden_neurons = 784 layer_2_hidden_neurons = 30 layer_3_hidden_neurons = 10 layers = [layer_1_hidden_neurons, layer_2_hidden_neurons, layer_3_hidden_neurons] number_of_epochs = 30 mini_batch_size = 10 learning_rate = 3.0 net = Network(layers) start_time = time.time() net.stochasticGradientDescent( training_data, number_of_epochs, mini_batch_size, learning_rate, test_data=test_data) end_time = time.time() duration = end_time - start_time print(duration)
class Environment(object): """ Responsável por lidar com as interações com o modelo do circuito criado no OpenDSS. Argumentos: None Atributos: _model (OpenDssEngine): Motor do OpenDSS utilizado para a simulação. _config (Configuration): Configurações globais do projeto. _max_episodes (int): Número máximo de episódios para treinamento. _steps_per_episode (int): Tamanho de cada episódio. _actions_per_step (int): Quantas ações podem ser tomadas em cada passo. _possible_actions (list(Tuple | None)): Ações que podem ser tomadas pelo agente no sistema. _possible_actions_map (dict): Mapeamento das ações para códigos. _weekdays_map (dict): Mapeamento dos dias da semana para números. _voltage_targets (dict): Alvos de tensão para cada barra. _current_episode (int): Episódio atual. _current_step (int): Passo dentro do episódio atual. _actions_taken (int): Quantas ações foram tomadas no passo atual. _training_steps (int): Número total de passos. Utilizado para iniciar o memory replay da rede. _agent (Agent): Agente que decide as ações a serem tomadas. _online_network (Network): Rede Neural online. _target_network (Network): Rede Neural offline. Métodos: train() : Executa o processo de treinamento do agente. run() : Inicia a execução do algoritmo. plot_run_results(by_bus_controlled_voltage, by_bus_normal_voltage, controlled_average_voltage, normal_average_voltage, naive_average_voltage, naive_by_bus_voltage) : Representa de maneira gráfica os resultados da execução do algoritmo. _check_action_undone(last_action, action) : Verifica se uma ação tomada em uma etapa do algoritmo é oposta a realizada na etapa anterior. get_voltage_targets() : Obtém os alvos de tensão de uma arquivo para cada barramento e cada minuto do dia. calculate_reward(previous_state, current_state, action, last_action) : Calcula o prêmio recebido pelo agente de acordo com a ação tomada e a mudança de estado gerada. get_epsilon() : Obtém o parâmetro epsilon a ser utilizado no passo atual. get_base_learning_rate() : Obtém a taxa de aprendizado a ser utilizada na rede neural. get_discount_factor() : Obtém a taxa de aprendizado a ser utilizada no reinforcement learning. sync_networks() : Sincroniza as redes online e offline, copiando os pesos de uma para outra. scaler_partial_fit(inputs) : Faz o fit parcial do normalizador utilizado na entrada das redes neurais com os dados disponíveis até a etapa atual. _train_network() : Treina a rede online através de uma amostragem aleatória da memória do agente. """ def __init__(self): self._model = OpenDssEngine() self._config = Configuration() self._model.start() self._max_episodes = 1000 self._steps_per_episode = self._model.get_episode_length() self._actions_per_step = self._model.get_actions_per_step() self._possible_actions = self._model.get_possible_actions() + [None] self._possible_actions_map = dict({(i, action) for i, action in enumerate(self._possible_actions)}) self._weekdays_map = {'MON': 1, 'TUE': 2, 'WED': 3, 'THU': 4, 'FRI': 5, 'SAT': 6, 'SUN': 7} self._voltage_targets = None self._current_episode = 1 self._current_step = 1 self._actions_taken = 0 self._training_steps = 0 self._agent = Agent(self._possible_actions_map) self._online_network = Network(len(self._model.get_state().state_space_repr(self._current_step, self._actions_taken, self._weekdays_map[self._model.get_weekday()])), len(self._possible_actions), self._model.get_file_name()) self._target_network = Network(len(self._model.get_state().state_space_repr(self._current_step, self._actions_taken, self._weekdays_map[self._model.get_weekday()])), len(self._possible_actions), self._model.get_file_name()) self.sync_networks() def train(self, plot=True): """ Executa o processo de treinamento do agente. A cada passo da simulação, o agente toma uma decisão. As redes são treinadas de acordo com o modo proposto pelo algoritmo. Parâmetros: plot (bool): Determina se a perda acumulada da rede será plotada após o treino. Erros: None Retorna: Perda média da rede online. """ self._current_episode = 1 self._current_step = 1 self._actions_taken = 0 self._training_steps = 0 self._agent.reset() start_state = deepcopy(self._model.get_state()) self._online_network._scaler.partial_fit(np.array(self._model.get_state().state_space_repr(self._current_step, self._actions_taken, self._weekdays_map[self._model.get_weekday()])).reshape(1, -1)) self._target_network._scaler.partial_fit(np.array(self._model.get_state().state_space_repr(self._current_step, self._actions_taken, self._weekdays_map[self._model.get_weekday()])).reshape(1, -1)) pbar = tqdm(total=self._max_episodes, desc='Episode: ', position=0, leave=True) while (self._current_episode <= self._max_episodes): self._model.start(False) self._current_step = 1 self.get_voltage_targets() self._model.set_state(deepcopy(start_state)) while (self._current_step <= self._steps_per_episode): self._model.update_loads(self._current_step) self._actions_taken = 0 while (self._actions_taken < self._actions_per_step): self._agent.take_action( self._model, self, True, self._online_network, self._current_step, self._actions_taken) self._actions_taken += 1 self._training_steps += 1 if self._training_steps > self._config.replay_start: self._train_network() if self._training_steps % self._config.target_update_frequency == 0: self.sync_networks() self._current_step += 1 self._current_episode += 1 pbar.update(1) self.sync_networks() self._target_network.dump_weights() self._target_network.dump_scaler() self._model.set_state(deepcopy(start_state)) # Plotar a perda da rede após o treino loss = self._online_network.get_loss() if plot: plt.plot([x for x in range(len(loss))], loss) plt.show() return sum(loss) / len(loss) def run(self): """ Inicia a execução do algoritmo. São feitas três rodadas com o sistema na mesma condição inicial: -> Agente de RL atua sobre o sistema. -> Agente Naive atua sobre o sistema. -> O sistema roda sem nenhuma ação externa, com ou sem atuação dos reguladores automáticos (parâmetro da função start). Parâmetros: None Erros: None Retorna: Tensão média e por barramento, controlada e não controlada. """ self._model.start(False) normal_average_voltage = [] controlled_average_voltage = [] by_bus_normal_voltage = [] by_bus_controlled_voltage = [] start_state = deepcopy(self._model.get_state()) load_profile = deepcopy(self._model.get_load_profile()) self._current_step = 1 self._agent.reset() self._actions_taken = 0 self._model.set_state(deepcopy(start_state)) self._model.update_load_profile(load_profile) pbar = tqdm(total=self._steps_per_episode, desc='Smart Run Step: ', position=0, leave=True) while (self._current_step <= self._steps_per_episode): self._actions_taken = 0 while (self._actions_taken < self._actions_per_step): self._model.update_loads(self._current_step) self._agent.take_action( self._model, self, False, self._target_network, self._current_step, self._actions_taken) controlled_average_voltage.append( self._model.evaluate_voltages()) by_bus_controlled_voltage.append(self._model.get_voltages()) self._actions_taken += 1 self._current_step += 1 pbar.update(1) print(self._agent._taken_actions) self._model.start(False) self._current_step = 1 self._agent.reset() self._actions_taken = 0 self._model.set_state(deepcopy(start_state)) self._model.update_load_profile(load_profile) na = NaiveAgent() naive_average_voltages, naive_by_bus_voltages = na.run() self._model.start(True) self._current_step = 1 self._agent.reset() self._actions_taken = 0 self._model.set_state(deepcopy(start_state)) self._model.update_load_profile(load_profile) pbar = tqdm(total=self._steps_per_episode, desc='No-Action Run Step: ', position=0, leave=True) while (self._current_step <= self._steps_per_episode): self._actions_taken = 0 while (self._actions_taken < self._actions_per_step): self._model.update_loads(self._current_step) normal_average_voltage.append( self._model.evaluate_voltages()) by_bus_normal_voltage.append(self._model.get_voltages()) self._actions_taken += 1 self._current_step += 1 pbar.update(1) self.plot_run_results(by_bus_controlled_voltage, by_bus_normal_voltage, controlled_average_voltage, normal_average_voltage, naive_average_voltages, naive_by_bus_voltages) return (controlled_average_voltage, normal_average_voltage, by_bus_controlled_voltage, by_bus_normal_voltage) def plot_run_results(self, by_bus_controlled_voltage, by_bus_normal_voltage, controlled_average_voltage, normal_average_voltage, naive_average_voltage, naive_by_bus_voltage): """ Representa de maneira gráfica os resultados da execução do algoritmo. Parâmetros: by_bus_controlled_voltage (list): Lista com as tensões controladas em cada barramento pelo agente de RL. by_bus_normal_voltage (list): Lista com as tensões não controladas em cada barramento. controlled_average_voltage (list): Lista com a tensão média controlada pelo agente de RL. normal_average_voltage (list): Lista com a tensão média não controlada. naive_average_voltage (list): Lista com a tensão média controlada pelo agente naive. naive_by_bus_voltage (list): Lista com as tensões controladas em cada barramento pelo agente naive. Erros: None Retorna: None """ smart_buses = {} base_buses = {} naive_buses = {} for smart_step, step, naive_step in zip(by_bus_controlled_voltage, by_bus_normal_voltage, naive_by_bus_voltage): for smart_bus, base_bus, naive_bus in zip(smart_step, step, naive_step): if smart_bus['bus'] in smart_buses: smart_buses[smart_bus['bus']].append(mean(smart_bus['voltages'])) else: smart_buses[smart_bus['bus']] = [mean(smart_bus['voltages'])] if base_bus['bus'] in base_buses: base_buses[base_bus['bus']].append(mean(base_bus['voltages'])) else: base_buses[base_bus['bus']] = [mean(base_bus['voltages'])] if naive_bus['bus'] in naive_buses: naive_buses[naive_bus['bus']].append(mean(naive_bus['voltages'])) else: naive_buses[naive_bus['bus']] = [mean(naive_bus['voltages'])] fig10, ax10 = plt.subplots() ax10.set_title("Tensão Controlada (Reinforcement Learning) por Barramento") for k, v in smart_buses.items(): ax10.plot([x for x in range(len(v))], v) colormap = plt.cm.nipy_spectral colors = [colormap(i) for i in np.linspace(0, 1, len(ax10.lines))] for i, j in enumerate(ax10.lines): j.set_color(colors[i]) fig11, ax11 = plt.subplots() ax11.set_title("Tensão Normal por Barramento") for k, v in base_buses.items(): ax11.plot([x for x in range(len(v))], v) for i, j in enumerate(ax11.lines): j.set_color(colors[i]) fig12, ax12 = plt.subplots() ax12.set_title("Tensão Controlada (Naive) por Barramento") for k, v in naive_buses.items(): ax12.plot([x for x in range(len(v))], v) for i, j in enumerate(ax12.lines): j.set_color(colors[i]) fig, ax1 = plt.subplots() rms_normal = sqrt(mean_squared_error([1 for x in range(len(normal_average_voltage))], normal_average_voltage)) rms_rl = sqrt(mean_squared_error([1 for x in range(len(controlled_average_voltage))], controlled_average_voltage)) rms_naive = sqrt(mean_squared_error([1 for x in range(len(naive_average_voltage))], naive_average_voltage)) p1, = ax1.plot([x for x in range(len(controlled_average_voltage))], controlled_average_voltage, color='blue', label=f"Reinforcement Learning (RMSE: {round(rms_rl, 4)})") p2, = ax1.plot([x for x in range(len(normal_average_voltage))], normal_average_voltage, color='red', label=f"Tensão Original (RMSE: {round(rms_normal, 4)})") p3, = ax1.plot([x for x in range(len(naive_average_voltage))], naive_average_voltage, color='green', label=f"Agente Naive (RMSE: {round(rms_naive, 4)})") lines = [p1, p2, p3] ax1.legend(lines, [l.get_label() for l in lines]) ax1.axhspan(ymax=1.005, ymin=0.995, color='yellow', alpha=0.15) plt.title("Tensão Média no Sistema") plt.ylabel('Tensão (p.u.)') plt.xlabel('Minutos') plt.grid(linestyle='dashed') data_control = [] for s in by_bus_controlled_voltage: for b in s: data_control.append(mean(b['voltages'])) data_normal = [] for s in by_bus_normal_voltage: for b in s: data_normal.append(mean(b['voltages'])) data_naive = [] for s in naive_by_bus_voltage: for b in s: data_naive.append(mean(b['voltages'])) fig1, ax1 = plt.subplots() ax1.set_title('Distribuição das Tensões no Sistema') ax1.boxplot([data_control, data_naive, data_normal]) plt.xticks([1, 2, 3], ['Tensão Controlada (RL)', 'Tensão Controlada (Naive)', 'Tensão Normal']) plt.show() def _check_action_undone(self, last_action, action): """ Verifica se uma ação tomada em uma etapa do algoritmo é oposta a realizada na etapa anterior. Parâmetros: last_action (tuple): Ação tomada na etapa anterior. action (tuple): Ação tomada na etapa atual. Erros: None Retorna: Verdadeiro ou falso de acordo com o resultado. """ if action and last_action: if (last_action[0] != action[0]): return False if (last_action[1] != action[1]): return False # Capactior if (action[0] == self._model.switch_capacitor.__name__): if ((action[2] == CapacitorCodes.Action.StepUp and last_action[2] == CapacitorCodes.Action.StepDown) or (action[2] == CapacitorCodes.Action.StepDown and last_action[2] == CapacitorCodes.Action.StepUp)): return True else: return False # Transformer elif (action[0] == self._model.change_tap.__name__): if ((action[2] == TransformerCodes.Action.TapUp and last_action[2] == TransformerCodes.Action.TapDown) or (action[2] == TransformerCodes.Action.TapDown and last_action[2] == TransformerCodes.Action.TapUp)): return True else: return False else: return False else: return False def get_voltage_targets(self): """ Obtém os alvos de tensão de uma arquivo para cada barramento e cada minuto do dia. O arquivo tem o formato 'bus';'minute';'target'. Caso não exista o arquivo ou não haja dados para o alvo para um determinado barramento/minuto, o alvo padrão (1 p.u.) é utilizado. Parâmetros: None Erros: None Retorna: Alvo de tensão ou none. """ weekday = self._model.get_weekday() if path.isfile(path.join(basepath, "voltage_targets", weekday + ".csv")): self._targets = pd.read_csv(path.join(basepath, "voltage_targets", weekday + ".csv"), sep=";") self._targets.set_index(['bus', 'minute'], inplace=True) else: self._targets = None def calculate_reward(self, previous_state, current_state, action, last_action): """ Calcula o prêmio recebido pelo agente de acordo com a ação tomada e a mudança de estado gerada. Parâmetros: previous_state (dict): Dicionário com a tensão em cada barramento no estado anterior. current_state (dict): Dicionário com a tensão em cada barramento no estado atual. action (tuple): Ação tomada no estado atual. last_action (tuple): Ação tomada no estado anterior. Erros: None Retorna: Recompensa """ reward = 0 v_target = None for previous_voltage, current_voltage in zip(previous_state, current_state): if self._targets is not None: bus = current_voltage['bus'] query = f'bus == "{bus}" and minute == {self._current_step}' target = self._targets.query(query)['target'] if not target.empty: v_target = float(target) else: v_target = self._config.target_voltage else: v_target = self._config.target_voltage v_before = round(mean(previous_voltage['voltages']), 3) v_after = round(mean(current_voltage['voltages']), 3) if (abs(v_target - v_before) < abs(v_target - v_after)): reward += self._config.away_target_penalty_over if (self._config.lower_voltage_limit * v_target > v_after) or (v_after > self._config.upper_voltage_limit * v_target): reward += self._config.out_of_limits_penalty elif (abs(v_target - v_before) > abs(v_target - v_after)): reward += self._config.toward_target_reward else: if (self._config.lower_voltage_limit * v_target > v_after) or (v_after > self._config.upper_voltage_limit * v_target): reward += self._config.out_of_limits_penalty if not action: reward += self._config.stand_still_reward_out_of_target else: reward += self._config.meaningless_action_penalty if self._check_action_undone(last_action, action): reward += self._config.action_undone_penalty return reward def get_epsilon(self): """ Obtém o parâmetro epsilon a ser utilizado no passo atual. Parâmetros: None Erros: None Retorna: Epsilon (entre 0 e 1) """ decay = ((self._max_episodes + 1 - self._current_episode) / self._max_episodes) return max(self._config.epsilon[0], self._config.epsilon[1] * decay) def get_base_learning_rate(self): """ Obtém a taxa de aprendizado a ser utilizada na rede neural. Parâmetros: None Erros: None Retorna: Taxa de aprendizado. """ return self._config.base_learning_rate def get_discount_factor(self): """ Obtém a taxa de aprendizado a ser utilizada no reinforcement learning. Parâmetros: None Erros: None Retorna: Fator de desconto. """ return self._config.discount_factor def sync_networks(self): """ Sincroniza as redes online e offline, copiando os pesos de uma para outra. Parâmetros: None Erros: None Retorna: None """ self._target_network = deepcopy(self._online_network) def scaler_partial_fit(self, inputs): """ Faz o fit parcial do normalizador utilizado na entrada das redes neurais com os dados disponíveis até a etapa atual. Parâmetros: inputs (np.array): Array com as entradas que serão utilizadas. Erros: None Retorna: None """ self._online_network._scaler.partial_fit(inputs) self._target_network._scaler.partial_fit(inputs) def _train_network(self): """ Treina a rede online através de uma amostragem aleatória da memória do agente. Parâmetros: None Erros: None Retorna: None """ train_batch = self._agent.sample_memory() inputs = np.array([sample["state"] for sample in train_batch], dtype=np.float32) self.scaler_partial_fit(inputs) actions = np.array([sample["action"] for sample in train_batch]) rewards = np.array([sample["reward"] for sample in train_batch]) next_inputs = np.array([sample["next_state"] for sample in train_batch], dtype=np.float32) self.scaler_partial_fit(next_inputs) encoded_actions = np.eye(len(self._possible_actions))[actions] best_actions = np.argmax(np.squeeze(self._online_network.model(next_inputs)), axis=-1) best_actions_values = np.squeeze(self._target_network.model(next_inputs)) next_q_values = best_actions_values[np.arange(len(best_actions_values)), best_actions] targets = rewards + self._config.discount_factor * next_q_values self._online_network.update_weights(inputs, targets, encoded_actions)
class AutonomousSnake: directions = {0: np.array([0, -1]), 1: np.array([1, -1]), 2: np.array([1, 0]), 3: np.array([1, 1]), 4: np.array([0, 1]), 5: np.array([-1, 1]), 6: np.array([-1, 0]), 7: np.array([-1, -1])} def __init__(self, pos=[13, 13], length=5, base_max_moves=100, field_width=25, field_height=25): self.field_width = field_width self.field_height = field_height self.position = np.array([pos[0], pos[1]]) self.velocity = self.directions[np.random.choice([0, 2, 4, 6])] self.length = length self.score = 0 self.brain = Network([24, 18, 4]) self.tail = np.empty(shape=[0, 2]) for i in range(self.length - 1, 0, -1): self.tail = np.append(self.tail, [self.position - i * self.velocity], axis=0) self.alive = True self.time_alive = 0 self.max_moves = base_max_moves self.moves_left = self.max_moves self.grow_count = 0 self.lastMoveDir = np.array(self.velocity) self.food = self.place_food() self.vision = np.zeros(24) self.fitness = 0 def think(self): self.see() decision = self.brain.feed_forward(self.vision.reshape((24, 1))) # decision_arg = np.argmax(decision) chosen_direction = self.directions[np.argmax(decision) * 2] if not np.array_equal(chosen_direction, -self.velocity): self.velocity = chosen_direction # self.velocity = self.directions[decision_arg * 2] # if not np.array_equal(self.velocity, -self.directions[ # decision_arg * 2]) else self.velocity def move(self): if self.will_collide(): self.alive = False if not self.alive: return self.tail = np.append(self.tail, [self.position], axis=0) self.position += self.velocity if self.grow_count == 0: self.tail = np.delete(self.tail, 0, 0) else: self.grow_count -= 1 self.length += 1 self.lastMoveDir = self.velocity if np.array_equal(self.position, self.food): self.eat() self.time_alive += 1 self.moves_left -= 1 if self.moves_left <= 0: self.alive = False def eat(self): self.food = self.place_food() self.grow_count += 1 self.score += 1 self.moves_left = self.max_moves def place_food(self): pos = np.array([np.random.randint(0, self.field_width), np.random.randint(0, self.field_height)]) while self.occupied(pos): pos = np.array([np.random.randint(0, self.field_width), np.random.randint(0, self.field_height)]) return pos def calc_fitness(self): if self.score < 10: self.fitness = math.floor(self.time_alive ** 2 * 2 ** self.score) else: self.fitness = self.time_alive ** 2 * 2 ** 10 * (self.score - 9) # Check if the position is occupied by the body of the snake def occupied(self, pos): for cell in self.tail: if np.array_equal(cell, pos): return True return np.array_equal(self.position, pos) def will_collide(self): next_position = self.position + self.velocity if self.is_on_tail(next_position): return True return (next_position[0] < 0 or next_position[0] >= self.field_width or next_position[1] < 0 or next_position[ 1] >= self.field_height) def is_on_tail(self, pos): for cell in self.tail: if np.array_equal(pos, cell): return True return False def see(self): self.vision = np.array([]) for d in self.directions: d_vision = self.look_in_direction(self.directions[d]) self.vision = np.append(self.vision, d_vision) def look_in_direction(self, direction): cur_pos = self.position + direction vision = np.zeros(3) food_found = False tail_found = False distance = 1.0 while not ( cur_pos[0] < 0 or cur_pos[0] >= self.field_width or cur_pos[1] < 0 or cur_pos[1] >= self.field_height): if not food_found and np.array_equal(self.food, cur_pos): vision[0] = 1.0 food_found = True if not tail_found and self.is_on_tail(cur_pos): vision[1] = 1.0 / distance tail_found = True cur_pos = cur_pos + direction distance += 1.0 vision[2] = 1.0 / distance return vision def crossover_brain(self, other): new_snake = AutonomousSnake() if self.fitness > other.fitness: new_snake.brain = self.brain.crossover(other.brain) else: new_snake.brain = other.brain.crossover(self.brain) return new_snake def mutate(self, rate, mag): self.brain.mutate(rate, mag) def reincarnate(self): self.position = np.array((13, 13)) self.velocity = self.directions[np.random.choice([0, 2, 4, 6])] self.length = 5 self.alive = True self.time_alive = 0 self.grow_count = 0 self.tail = np.empty(shape=[0, 2]) for i in range(self.length - 1, 0, -1): self.tail = np.append(self.tail, [self.position - i * self.velocity], axis=0) return self def set_training_config(self, config): self.max_moves = config.max_moves self.moves_left = self.max_moves def save(self, population, generation): brain_data = {"sizes": self.brain.sizes, "weights": self.brain.weights, "biases": self.brain.biases} data = {"generation": generation, # "id": self.sid, "fitness": self.fitness, "brain": brain_data} try: pickle_out = open("populations/pop{}/gen{}.pickle".format(population, generation), "wb") except Exception: os.mkdir("populations/pop{}".format(population)) pickle_out = open("populations/pop{}/gen{}.pickle".format(population, generation), "wb") pickle_out = open("populations/pop{}/gen{}.pickle".format(population, generation), "wb") pickle.dump(data, pickle_out) pickle_out.close() @classmethod def load_snake(cls, filename): pickle_in = open(filename, 'rb') data = pickle.load(pickle_in) pickle_in.close() brain = Network(data["brain"]["sizes"]) brain.weights = [np.array(w) for w in data["brain"]["weights"]] brain.biases = [np.array(b) for b in data["brain"]["biases"]] snake = AutonomousSnake() snake.fitness = data["fitness"] snake.brain = brain return snake def __copy__(self): snake_copy = AutonomousSnake() snake_copy.base_max_moves = self.max_moves snake_copy.brain = copy.copy(self.brain) return snake_copy
#создаем буфер для истории history_buffer = HistoryBuffer(HISTORY_SIZE) #внутренние активационные функции сети if INNER_FUNC == 'relu': in_func = ReLU in_func_der = dReLU elif INNER_FUNC == 'sigm': in_func = sigmoid in_func_der = sigmoid_prime #Инициализируем сеть, оценивающая энергетическую полезность некоторого дейсвтия в данном состоянии NN=Network(sizes=LAYERS,inner_function=in_func, inner_function_prime=in_func_der, output_function=lambda x: x, output_derivative=lambda x: 1, l1=L1, l2=L2, init_dist=W_B_INIT_DISTRIBUTION) #запускаем окружение env = gym.make('MountainCarContinuous-v0') #шум noise = UONoise() #запись видео if IS_VIDEO: env = wrappers.Monitor(env, "./video", force=True, video_callable=lambda episode_id: True) #изменение весов и cмещений поэпизодно network_changes = []
layers = [ Conv((5, 5, 3, 8), strides=1,pad=2, activation=relu, filter_init=lambda shp: np.random.normal(size=shp) * 1.0 / (5*5*3)), MaxPool(f=8, strides=8, channels = 8), Conv((3, 3, 8, 16), strides=1,pad=1, activation=relu, filter_init=lambda shp: np.random.normal(size=shp) * 1.0 / (3*3*8)), MaxPool(f=4, strides=4, channels = 16), Flatten((2, 2, 16)), FullyConnected((2*2*16, 20), activation=sigmoid, weight_init=lambda shp: np.random.normal(size=shp) * np.sqrt(1.0 / (2*2*16 + 20))), FullyConnected((20, 6), activation=linear, weight_init=lambda shp: np.random.normal(size=shp) * np.sqrt(1.0 / ( 20+ 6))) ] minibatch_size = 20 lr = 0.009 k = 2000 net = Network(layers, lr=lr, loss=cross_entropy) num_epochs = 10 costs = [] m = X_train.shape[0] for epoch in range(num_epochs): minibatch_cost = 0. num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set minibatches = random_mini_batches(X_train, Y_train, minibatch_size) epoch_cost = 0 for minibatch in minibatches: (minibatch_X, minibatch_Y) = minibatch net.train_step((minibatch_X, minibatch_Y)) loss = np.sum(cross_entropy.compute((net.forward(minibatch_X), minibatch_Y))) print("cost minibatch %f" % loss)
from mnist_loader import load_data_wrapper from neural_network import Network training_data, validation_data, test_data = load_data_wrapper() # Here is where we decide the number and size of each layer # For our test, the input layer MUST have 784 neurons and the output MUST have 10 # Otherwise, you can add whatever layers you like # Examples: # [784, 100, 10] # [784, 30, 30, 10] # [784, 40, 30, 20, 10] # [784, 10] net = Network([784, 30, 10]) net.SGD( training_data, epochs=30, # Number of times we train on the entire dataset mini_batch_size=10, # Number of examples used for each update eta=3.0, # Learning rate test_data=test_data)
def UONoise(): state = 0 while True: yield state state += -THETA_UON * state + SIGMA_UON * np.random.randn() #создаем буфер для истории history_buffer = ReplayBuffer(BUFFER_SIZE) #create a Neural Network #l1 and l2 for regularization Q = Network(LAYERS, output_function=lambda x: x, output_derivative=lambda x: 1, l1=L1, l2=L2, init_dist=W_B_INIT_DISTRIBUTION, flag=False) #start an enviroment env = gym.make('MountainCarContinuous-v0') if IS_VIDEO: env = wrappers.Monitor(env, "./video", force=True, video_callable=lambda episode_id: True) #дискретизируем пространство возможных действий possible_actions = np.linspace(-1, 1, NUM_POSSIBLE_ACTIONS)
from neural_network import Network # define net = Network([2, 3, 3]) dataset = [ ((0, 0), (0, 0, 0)), ((0, 1), (0, 1, 1)), ((1, 0), (0, 1, 1)), ((1, 1), (1, 1, 0)), ] # teach for net_error in net.teach_loop(dataset): print(net_error) # evaluate results while True: values = input("Inputs: ") while True: try: inputs = tuple(int(v) for v in values.split()) except ValueError: pass else: break # eval outputs = net.feed_forward(inputs) print(outputs) print([round(o) for o in outputs])
return np.tanh(x); def act_prime(x): return 1-np.tanh(x)**2; # loss function and its derivative def loss(y_true, y_pred): return np.mean(np.power(y_true-y_pred, 2)); def loss_prime(y_true, y_pred): return 2*(y_pred-y_true)/y_true.size; # training data x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]]); y_train = np.array([[[0]], [[1]], [[1]], [[0]]]); # network net = Network(); net.add(FCLayer((1,2), (1,3))); net.add(ActivationLayer((1,3), act, act_prime)); net.add(FCLayer((1,3), (1,1))); net.add(ActivationLayer((1,1), act, act_prime)); # train net.use(loss, loss_prime); net.fit(x_train, y_train, epochs=1000, learning_rate=0.1); # test out = net.predict(x_train); print(out);
import numpy as np from neural_network import Network from matplotlib import pyplot as plt m = 100 X = np.linspace(0, 1, m).reshape((m, 1)) y = np.array([np.exp(-np.sin(4 * np.pi * xx ** 3)) for xx in X]).reshape((m, 1)) N = 1 n = 1 nn = Network([n, 8, N], [None, 'sigmoid', 'identity'], [True, True, False], layer_weight_means_and_stds=[(0, 0.1), (0, 0.1)]) eta = 0.005 # Number of iterations to complete N_iterations = 500000 batch_size = int(m / 10) # Perform gradient descent for i in range(N_iterations): # For stochastic gradient descent, take random samples of X and T batch = np.random.randint(0, m, size=batch_size) # Run the features through the neural net (to compute a and z) y_pred = nn.feed_forward(X) # Compute the gradient grad_w = nn._gradient_fun(X, y)
class Individual: def __init__(self): self.NN = Network(5, 1) self.alive = True self.reached_pilars = 0 self.score = 0 self.rect = pygame.Rect((int(WIDTH / 2), int(HEIGHT / 2)), (40, 40)) self.movement_y = 0 self.bool = True self.bool2 = True def draw(self, display): pygame.draw.rect(display, (0, 255, 0), self.rect, 1) def update(self): if self.alive: self.score+=1 self.predict() self.movement_y += gravity self.rect.centery += self.movement_y self.check() VARIABLES.best_round_score = self.reached_pilars def check(self): global pipe_rect_list if self.rect.collidelist(pipe_rect_list) != -1: self.alive = False if pipe_rect_list[0].centerx - 10 < self.rect.centerx < pipe_rect_list[0].centerx + 10 and self.bool: self.bool = False self.reached_pilars += 1 elif not pipe_rect_list[0].centerx - 10 < self.rect.centerx < pipe_rect_list[0].centerx + 10: self.bool = True if pipe_rect_list[2].centerx - 10 < self.rect.centerx < pipe_rect_list[1].centerx + 10 and self.bool2: self.bool2 = False self.reached_pilars += 1 elif not pipe_rect_list[2].centerx - 10 < self.rect.centerx < pipe_rect_list[1].centerx + 10: self.bool2 = True if self.rect.bottom>HEIGHT or self.rect.top < 0: self.alive = False def jump(self): self.movement_y = -8 def get_state(self): global pipe_list if pipe_list[0].rect_upper.right < self.rect.left and pipe_list[0].rect_upper.right < pipe_list[1].rect_upper.right: pipe = pipe_list[1] elif pipe_list[1].rect_upper.right < self.rect.left and pipe_list[1].rect_upper.right < pipe_list[0].rect_upper.right: pipe = pipe_list[0] else: pipe = pipe_list[0] return [self.rect.centery/HEIGHT, pipe.rect_upper.bottom/HEIGHT, pipe.rect_lower.top/HEIGHT, pipe.rect_lower.centerx/WIDTH, self.movement_y/20] def pair(self, other): new_weight1 = self.NN.get_flatted() new_weight2 = other.NN.get_flatted() gene = random.randint(0, len(new_weight1) - 1) new_weight1[gene:], new_weight2[:gene] = new_weight2[gene:], new_weight1[:gene] return new_weight1, new_weight2 def predict(self): output = self.NN.predict(self.get_state()) if output < 0.5: self.jump() def reset(self): self.rect.centery = int(HEIGHT / 2) self.alive = True self.reached_pilars = 0 self.score = 0
m = X.shape[0] n = X.shape[1] N = len(classes) # Produce the one-hot matrix of classes T = np.zeros((m, N)) for t, yi in zip(T, y): t[yi] = 1 # Instantiate a neural network # First argument: number of nodes per layer # Second argument: Activation functions for each layer (layer 0 is always None) # Third argument: Whether to add a bias node # Fourth argument: standard deviation and mean of initial guess for weights nn = Network([n, 20, N], [None, 'sigmoid', 'softmax'], [True, True, False], layer_weight_means_and_stds=[(0, 0.1), (0, 0.1)]) # Learning rate eta = 0.001 # Number of iterations to complete N_iterations = 10000 # Perform gradient descent for i in range(N_iterations): # For stochastic gradient descent, take random samples of X and T # Run the features through the neural net (to compute a and z) y_pred = nn.feed_forward(X)
from data import Database from random import shuffle from neural_network import Network, loss from copy import deepcopy db = Database("iris_data.txt") nn = Network((4, 5, 3)) verbose = True num_folds = 10 num_reports = 4 def n_fold(n): """Should return a list of tuples of training and testing data for every run""" folds = list() training_size = len(db.rows) // n for fold in range(n): start = training_size * fold end = training_size * fold + training_size testing = db.rows[start:end] training = db.rows[:start] + db.rows[end:] folds.append((training, testing)) return folds def run(num_epoch, training, testing): # Find a better place for this stuff epochs_per_report = num_epoch // num_reports min_loss = 5000
def run(weights, number_of_hidden_layers, node_for_first_layer, node_for_hidden_layer, node_for_last_layer, max_steps, board_size=None, draw=False, generation=None): snake = Snake(board_size=board_size, generation=generation) score = 0 game_score = 0 direction = 0 same_direction = 0 left_collision, right_collision, front_collision, angle, distance = snake.return_variables( ) network = Network(number_of_hidden_layers, node_for_first_layer, node_for_hidden_layer, node_for_last_layer, weights) if draw: snake.init_draw() snake.start_game() steps = 0 while steps < max_steps: if not snake.playing: break # last_distance = distance last_score = game_score last_direction = direction network.set_first_layer([ left_collision, right_collision, front_collision, angle, distance ]) output = network.calculate_output() if output[0] >= output[1]: if output[0] >= output[2]: left_collision, right_collision, front_collision, angle, distance = snake.move( "left") direction = 0 else: left_collision, right_collision, front_collision, angle, distance = snake.move( "forward") direction = 2 elif output[1] >= output[2]: left_collision, right_collision, front_collision, angle, distance = snake.move( "right") direction = 1 else: left_collision, right_collision, front_collision, angle, distance = snake.move( "forward") direction = 2 game_score = snake.score if game_score > last_score: score += 1 steps = 0 if last_direction == direction: same_direction += 1 else: same_direction = 0 if same_direction > board_size: score = 0 snake.playing = False if draw: snake.draw_game() sleep(0.02) steps += 1 return score
model_now = 0 results = np.zeros([model_num, numToClassfy], dtype=int) print("开始读取MNIST数据:") images, label = load_data() test_images, test_label = load_data_test() #libsvm格式数据读取 svm_label, svm_images = data_to_libsvm(images, label) svm_test_label, svm_test_images = testdata_to_libsvm(test_images, test_label) #数据二值化 images = np.where(images >= 128, 1, 0) test_images = np.where(test_images >= 128, 1, 0) network = Network(784, 2, [200, 10], 10) network.startLearing(numToTrain, images, label, 1) results[model_now] = network.test(test_images, test_label, numToClassfy) model_now += 1 print("SVM 1训练中...") svm = Svm(svm_label, svm_images, svm_test_label, svm_test_images) results[model_now] = svm.train(numToClassfy, numToTrain, "-q -m 1000") model_now += 1 print("SVM 2训练中...") svm = Svm(svm_label, svm_images, svm_test_label, svm_test_images) results[model_now] = svm.train(numToClassfy, numToTrain, "-q -m 1000 -t 3") model_now += 1 knn = knn(10, images, label)