def update_model_weights(self, weights): self.model.set_weights(weights) self.searches = [ MCTS(self.game, self.model, self.mcts_args) for _ in range(len(self.game.players)) ] printl(f'{self.name}: Updated model weights')
def eval(self): games_with_rookie_first = int(self.training_args['EVAL_GAMES'] / 2) games_with_champ_first = self.training_args[ 'EVAL_GAMES'] - games_with_rookie_first wins, draws, losses = 0, 0, 0 for _ in range(games_with_rookie_first): result = self.execute_episode([self.rookie, self.champ]) if result == 1: wins += 1 elif result == -1: losses += 1 else: draws += 1 for _ in range(games_with_champ_first): result = self.execute_episode([self.champ, self.rookie]) if result == -1: wins += 1 elif result == 1: losses += 1 else: draws += 1 if wins + losses == 0 or wins / ( wins + losses) < self.training_args['PROMOTION_THRESHOLD']: printl(f'{self.name}: Rookie not promoted, get back to training') return False else: printl(f'{self.name}: Rookie promoted!') self.model.save_checkpoint( folder=self.training_args['CHECKPOINT_DIR'], filename=self.training_args['CHECKPOINT_PREFIX'] + str(self.generation), ) return True
def extract(cps, cpm=False): if not cpm: with open(cps, "r") as cpsc: corpus = cpsc.read().replace("\n", "") else: corpus = cps if type(corpus) == list: print("corpus", " ".join(corpus)) else: print("corpus", corpus) corpus = corpus.split(" ") tagged = NLP.Basic(st).tag(corpus) # the tagger will do the tokenization print("tagged corpus: ", tagged) entities = create_entities(tagged) entity_index = list(map(lambda x: x.index, entities)) characterizations = list(get_all_des(entities, entity_index, tagged)) for chari in range(len(characterizations)): entities[chari].attributes = characterizations[chari] targets = list(add_target(entities, tagged)) actions = list( filter(lambda x: True if type(x) == EC.Action else False, entities)) for targ in range(len(targets)): actions[targ].target = targets[targ] utils.printl(entities) # characterizations index raw_characterizations = utils.flatten(characterizations) characterizations_indices = list( map(lambda x: x.index, raw_characterizations)) relations = extract_relations(copy.deepcopy(tagged), entity_index, characterizations_indices) relation_index = list(map(lambda x: x.index, relations)) utils.printl(relations) return construct_graph(entities, entity_index, relations, relation_index)
def train(self): training_sample = self.examples.sample_memories( self.training_args['TRAINING_SAMPLE_SIZE']) self.model.train(training_sample, self.training_args['BATCH_SIZE'], self.training_args['EPOCHS']) self.generation += 1 printl(f'{self.name}: Training for generation {self.generation} done')
def generate_training_examples(self): training_examples = [] for _ in range(self.selfplay_args['GAMES_PER_SUBMISSION']): training_examples += self.execute_episode_with_example_storing() printl( f'{self.name}: Finished episode generation {self.submission_count + 1} with {len(training_examples)} examples - looking to submit examples' ) return training_examples
def save_checkpoint(self, folder, filename): filepath = os.path.join(folder, filename) if not os.path.exists(folder): printl( f'Checkpoint dir does not exist - making directory {folder}') os.mkdir(folder) else: printl('Checkpoint dir exists') # pkl.dump(self.nnet, open(filepath, 'w+')) self.nnet.save(filepath)
def execute_episode_with_example_storing(self): state = self.game.get_initial_state() current_player = 1 turn_count = 0 training_examples = [] start_time = time.time() actions = [] while not self.game.get_is_terminal_state(state): canonical_state = self.game.get_canonical_form( state, current_player) if turn_count >= self.selfplay_args['DETERMINISTIC_PLAY']: probs = self.searches[current_player - 1].get_probs( canonical_state, temperature=1) action = np.argmax(probs) else: probs = self.searches[current_player - 1].get_probs( canonical_state, temperature=1) # printl(f'{self.name}: {np.random.random()}') # printl(f'{self.name}: {self.game.get_allowed_actions(state)}') # action = np.random.choice(self.game.get_allowed_actions(state), p = probs) action = np.random.choice(range(len(probs)), p=probs) assert self.game.get_allowed_actions( state )[action], f'Must choose an allowed action!\nChose {action} in state\n{canonical_state}' # printl(f'{self.name}: action on turn {turn_count} is {action}') symmetries = self.game.get_symmetries(canonical_state, probs) for s, p in symmetries: training_examples.append([s, p, current_player]) # action = np.argmax(probs) previous_player = current_player state, current_player = self.game.get_next_state( state, current_player, action) turn_count += 1 actions.append(str(action)) self.episode_count += 1 result = self.game.get_result(state, current_player) if result == 0: printed_result = 'draw' else: printed_result = f'P{3 - current_player} wins' printl( f'{self.name}: Episode {self.episode_count} completed in {round(time.time() - start_time, 2)}s with \ {turn_count} actions and generating {len(training_examples)} examples.\ \nActions taken were {", ".join(actions)}.\nResult was {printed_result}\n{state}' ) training_examples = [(x[0], x[1], result * ((-1)**(x[2] != previous_player))) for x in training_examples] for search in self.searches: search.reset() return training_examples
def receive_examples(self): received_count = 0 while (received_count < self.training_args['MAX_EXAMPLES_PER_RECEIVE']): # while not self.example_ingress.empty() and (received_count < self.training_args['MAX_EXAMPLES_PER_RECEIVE']): memories = self.example_ingress.get() self.examples.store_memories(memories) received_count += 1 memory_usage, memory_capacity = self.examples.get_memory_usage() printl( f'{self.name}: Received examples. Memory usage {memory_usage}/{memory_capacity}.' )
def sender(ip, port): if type(port) is str: port = int(port) sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server_address = (ip, port) printl('connecting to %s port %s' % server_address) sock.connect(server_address) while True: try: # Send data message = input() msg = bytes(message, 'utf-8') sock.sendall(msg) except Exception as e: printl(e)
def receiver(ip, port): if type(port) is str: port = int(port) sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server_addr = (ip, port) printl('starting server on %s port %s: ' % server_addr) sock.bind(server_addr) sock.listen(1) while True: # wait for connection printl('waiting for connection ...') conn, client_addr = sock.accept() try: printl("got connection from ", client_addr) while True: data = conn.recv(1024) if len(data) > 0: printl(str(user_name) + ' -> ' + data.decode('utf-8')) except Exception as e: printl(e)
def set_up_processes(name, main_args, mcts_args, nnet_args, selfplay_args, training_args, m, n, k): printl(f'{name}: Setting up processes') game = Game(m, n, k) kill_ingress = Queue(1) example_ingress = Queue() weight_egresses = [ Queue() for _ in range(main_args['NUM_OF_SELFPLAY_PROCESSES']) ] training_process = TrainingProcess( name='TP0', gpu_id='0' if main_args['USE_GPUS'] and main_args['NUM_OF_GPUS'] else None, game=game, lazy_model_module=LAZY_MODEL_NAME, model_args=(game, nnet_args), example_ingress=example_ingress, weight_egresses=weight_egresses, kill_ingress=kill_ingress, training_args=training_args, mcts_args=mcts_args, selfplay_args=selfplay_args, ) printl(f'{name}: Training process set up') if main_args['USE_GPUS'] and main_args['NUM_OF_GPUS']: selfplay_gpu_ids = [str(i) for i in range(1, main_args['NUM_OF_GPUS'])] while len(selfplay_gpu_ids) < MAIN_ARGS['NUM_OF_SELFPLAY_PROCESSES']: selfplay_gpu_ids.append(None) else: selfplay_gpu_ids = [ None for _ in range(main_args['NUM_OF_SELFPLAY_PROCESSES']) ] selfplay_processes = [] for i, gpu_id, weight_egress in zip( range(main_args['NUM_OF_SELFPLAY_PROCESSES']), selfplay_gpu_ids, weight_egresses): time.sleep(1) selfplay_processes.append( SelfplayProcess( name=f'SP{i}', gpu_id=gpu_id, game=game, lazy_model_module=LAZY_MODEL_NAME, model_args=(game, nnet_args), mcts_args=mcts_args, selfplay_args=selfplay_args, weight_ingress=weight_egress, example_egress=example_ingress, kill_ingress=kill_ingress, )) printl(f'{name}: Selfplay processes set up') return training_process, selfplay_processes, kill_ingress
def run(self): printl(f'{self.name}: Selfplay process started') # np.random.seed(int(datetime.datetime.now().timestamp() * 1e6) % 2**32) # np.random.seed(int.from_bytes(os.urandom(4), byteorder='little')) if self.gpu_id is not None: os.environ['CUDA_VISIBLE_DEVICES'] = str( self.gpu_id) # '{}'.format() else: os.environ['CUDA_VISIBLE_DEVICES'] = '' printl(f'{self.name}: CUDA_VISIBLE_DEVICES set to {self.gpu_id}') self.lazy_model_module = LazyLoader(self.lazy_model_module, globals(), self.lazy_model_module) ModelClass = self.lazy_model_module.ExportedModel time = (int(datetime.datetime.now().timestamp() * 1e6)**10) % 2**32 printl(f'{self.name}: Setting seed to {time}') np.random.seed(time) self.model = ModelClass(*self.model_args) self.searches = [ MCTS(self.game, self.model, self.mcts_args) for _ in range(len(self.game.players)) ] printl(f'{self.name}: Set up model and searches') printl(f'Awaiting initial weights') weights = self.weight_ingress.get() printl(f'{self.name}: Found initial weights') self.update_model_weights(weights) printl(f'{self.name}: Initial weights loaded') while self.kill_ingress.empty(): if not self.weight_ingress.empty(): weights = self.weight_ingress.get() printl(f'{self.name}: Found weights update') self.update_model_weights(weights) printl(f'{self.name}: Weights updated') else: printl(f'{self.name}: No weights update, continuing') printl( f'{self.name}: Starting example submission {self.submission_count + 1}' ) self.example_egress.put(self.generate_training_examples()) self.submission_count += 1 printl( f'{self.name}: Finished example submission {self.submission_count}' ) printl(f'{self.name}: Selfplay process killed - process ending')
def output_weights(self): for egress in self.weight_egresses: egress.put(self.model.get_weights()) printl(f'{self.name}: Outputted weights to selfplay processes')
def run(self): printl(f'{self.name}: Training process started') # random.seed() # os_reseed() if self.gpu_id is not None: os.environ['CUDA_VISIBLE_DEVICES'] = str( self.gpu_id) # '{}'.format() else: os.environ['CUDA_VISIBLE_DEVICES'] = '' printl(f'{self.name}: CUDA_VISIBLE_DEVICES set to {self.gpu_id}') self.lazy_model_module = LazyLoader(self.lazy_model_module, globals(), self.lazy_model_module) ModelClass = self.lazy_model_module.ExportedModel self.model = ModelClass(*self.model_args) time = int(datetime.datetime.now().timestamp() * 1e6) % 2**32 printl(f'{self.name}: Setting seed to {time}') np.random.seed(time) prev_checkpoint = self.training_args.get('PREVIOUS_CHECKPOINT', None) if prev_checkpoint is not None: self.model.load_checkpoint(*prev_checkpoint) self.champ = self.model self.rookie = self.model can_train = False self.generation = 0 printl(f'{self.name}: Outputting inital weights') self.output_weights() while True: if not self.kill_ingress.empty(): break for _ in range(self.training_args['TRAINING_ROUNDS_PER_EVAL']): if not self.kill_ingress.empty(): break self.receive_examples() if can_train: self.train() elif self.examples.get_memory_usage( )[0] / self.examples.get_memory_usage( )[1] > self.training_args['START_TRAINING_THRESHOLD']: can_train = True printl( f'{self.name}: Memory usage acceptable, can start training' ) self.train() if not self.kill_ingress.empty(): break if can_train: improved = self.eval() if improved: printl( f'{self.name}: We now have generation {self.generation} of our model' ) self.output_weights() printl(f'{self.name}: Training process killed') self.model.save_checkpoint( folder=self.training_args['CHECKPOINT_DIR'], filename=self.training_args['CHECKPOINT_PREFIX'] + 'final', # filename = self.training_args['CHECKPOINT_PREFIX'] + 'final.pkl', ) printl(f'{self.name}: Final checkpoint saved - process ending')
def __init__(self, use_set='train', resume_itr=0, config={}): """ Args: kshot: num samples to generate per class in one batch to train kquery: num samples to generate per class in one batch to test """ self.config = config self.use_set = use_set self.crosswise = config.get('crosswise') if self.crosswise: if self.use_set == 'train': self.start = 0 self.each_class = 480 self.use_set += '_cross' elif self.use_set == 'val': self.start = 480 self.each_class = 120 self.use_set += '_cross' else: self.start = 0 self.each_class = 600 printl('use_set:' + self.use_set) self.kshot = config.get('kshot') self.kquery = config.get('kquery') self.meta_batch_size = config.get('meta_batch_size', 1) self.resume_index = resume_itr * self.meta_batch_size self.resume_valindex = ( (resume_itr - 1) // config.get('save_iter', 1000) + 1) * self.meta_batch_size self.num_samples_per_class = self.kshot + self.kquery self.num_classes = config.get('num_classes', 5) self.num_shot_per_task = self.kshot * self.num_classes self.num_query_per_task = self.kquery * self.num_classes self.img_size = config.get('img_size', 84) normalize = T.Normalize(np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225])) ''' normalize = T.Normalize(np.array([0.5, 0.5, 0.5]), np.array([0.5, 0.5, 0.5])) ''' if config.get('train'): transform1 = T.Compose([ T.RandomHorizontalFlip(), T.Resize(100), T.RandomResizedCrop(self.img_size), ]) transform2 = T.Compose([ T.ColorJitter(0.4, 0.4, 0.4), T.ToTensor(), Lighting(0.1), normalize, ]) else: transform1 = T.Compose([ T.Resize(100), T.CenterCrop(self.img_size), ]) transform2 = T.Compose([ T.ToTensor(), normalize, ]) self.transform = T.Compose([transform1, transform2]) self.data_folder = config.get('data_folder', '../../DataSets/mini-imagenet') self.len_fname = len(self.data_folder) + 1 self.tasks_file = 'miniimagenet_' + str(self.num_classes) + 'way_' + \ str(self.kshot) + 'shot_' + str(self.kquery) + 'query_' self.train_folders = [os.path.join(self.data_folder, 'train', family) \ for family in os.listdir(os.path.join(self.data_folder,'train')) \ if os.path.isdir(os.path.join(self.data_folder, 'train', family))] self.test_folders = [os.path.join(self.data_folder, 'test', family) \ for family in os.listdir(os.path.join(self.data_folder,'test')) \ if os.path.isdir(os.path.join(self.data_folder, 'test', family))] self.val_folders = [os.path.join(self.data_folder, 'val', family) \ for family in os.listdir(os.path.join(self.data_folder,'val')) \ if os.path.isdir(os.path.join(self.data_folder, 'val', family))] if self.use_set == 'train_plus_val' or self.use_set == 'train_cross': self.meta_character_folders = self.train_folders + self.val_folders self.data_num = 400000 elif self.use_set == 'val_cross': self.meta_character_folders = self.train_folders + self.val_folders self.data_num = 1000 elif self.use_set == 'train': self.meta_character_folders = self.train_folders self.data_num = 400000 elif self.use_set == 'test': self.meta_character_folders = self.test_folders self.data_num = 1000 elif self.use_set == 'val': self.meta_character_folders = self.val_folders self.data_num = 1000 else: raise ValueError('Unrecognized data source') self.tasks_file = self.tasks_file + self.use_set + '.pkl'
lazy_model_module=LAZY_MODEL_NAME, model_args=(game, nnet_args), mcts_args=mcts_args, selfplay_args=selfplay_args, weight_ingress=weight_egress, example_egress=example_ingress, kill_ingress=kill_ingress, )) printl(f'{name}: Selfplay processes set up') return training_process, selfplay_processes, kill_ingress if __name__ == '__main__': printl('Main: Program beginning') mp.set_start_method('spawn', force=True) training_process, selfplay_processes, kill_ingress = set_up_processes( 'Main', MAIN_ARGS, MCTS_ARGS, NNET_ARGS, SELFPLAY_ARGS, TRAINING_ARGS, m, n, k) printl('Main: Processes created') printl('Main: Starting training process') training_process.start() printl('Main: Starting selfplay processes') for proc in selfplay_processes: proc.start() printl('Main: Main process sleeping until kill time') time.sleep(MAIN_ARGS['RUNNING_TIME'])