def run(self): # get data data = TextDataset(self.src) # preprocess r = RuleBased() e = Evaluate() # nltk as baseline print("Processsing NLTK as baseline ... ") nltk_sentences = r.get_nltk_tokenized_sentences(data.clean_text) query_list = [s.split(" ") for s in nltk_sentences] begin, end, indexed_sentences = e.find_query_index(data.clean_idx_tokens, query_list) evaluation = e.evaluate(data, begin, end) result = {"key":"nltk", "begin":begin, "end":end, "sentences":indexed_sentences, "evaluation":evaluation} self.results.append(result) #import pdb; pdb.set_trace() # #rule_based methods print("Processsing rule based (Subject) ... ") filtered = r.filter_subject(nltk_sentences) query_list = [s.split(" ") for s in filtered] begin, end, indexed_sentences = e.find_query_index(data.clean_idx_tokens, query_list) evaluation = e.evaluate(data, begin, end) result = {"key":"has_subject", "begin":begin, "end":end, "sentences":indexed_sentences, "evaluation":evaluation} self.results.append(result) print("Processsing rule based (Verb)... ") filtered = r.filter_verb(nltk_sentences) query_list = [s.split(" ") for s in filtered] begin, end, indexed_sentences = e.find_query_index(data.clean_idx_tokens, query_list) evaluation = e.evaluate(data, begin, end) result = {"key":"has_verb", "begin":begin, "end":end, "sentences":indexed_sentences, "evaluation":evaluation} self.results.append(result) print("Processsing rule based (Subject & Verb)... ") filtered = r.filter_subject(nltk_sentences) filtered = r.filter_verb(filtered) query_list = [s.split(" ") for s in filtered] begin, end, indexed_sentences = e.find_query_index(data.clean_idx_tokens, query_list) evaluation = e.evaluate(data, begin, end) result = {"key":"has_subjectVerb", "begin":begin, "end":end, "sentences":indexed_sentences, "evaluation":evaluation} self.results.append(result) # write result print("Writing data to: " + str(self.dst) + "\033[1m" + str(self.filename) + "\033[0m") render = Render(self.dst, self.filename, data, data.ground_truth, self.results) render.save()
def start(self): """Main training loop.""" for i in range(self.num_iterations): print("Iteration", i + 1) training_data = [] # list to store self play states, pis and vs for j in range(self.num_games): print("Start Training Self-Play Game", j + 1) game = self.game.clone() # Create a fresh clone for each game. self.play_game(game, training_data) print(game.evaluate()) self.scores.append(game.evaluate()) # Save the current neural network model. self.net.save_model() # Load the recently saved model into the evaluator network. self.eval_net.load_model() # Train the network using self play values. self.net.train(training_data) # Initialize MonteCarloTreeSearch objects for both networks. current_mcts = MonteCarloTreeSearch(self.net) eval_mcts = MonteCarloTreeSearch(self.eval_net) ''' TO BE COMPLETED ! ''' evaluator = Evaluate(current_mcts=current_mcts, eval_mcts=eval_mcts, game=self.game) wins, losses = evaluator.evaluate() # wins, losses = 1, 1 print("wins:", wins) print("losses:", losses) num_games = wins + losses if num_games == 0: win_rate = 0 else: win_rate = wins / num_games print("win rate:", win_rate) if win_rate > self.eval_win_rate: # Save current model as the best model. print("New model saved as best model.") self.net.save_model("best_model") else: print("New model discarded and previous model loaded.") # Discard current model and use previous best model. self.net.load_model()
def start(self): """Main training loop.""" for i in range(CFG.num_iterations): print("Iteration", i + 1) training_data = [] # list to store self play states, pis and vs for j in range(CFG.num_games): print("Start Training Self-Play Game", j + 1) game = self.game.clone() # Create a fresh clone for each game. self.play_game(game, training_data) # Save the current neural network model. self.net.save_model() # Load the recently saved model into the evaluator network. self.eval_net.load_model() # Train the network using self play values. self.net.train(training_data) # Initialize MonteCarloTreeSearch objects for both networks. current_mcts = MonteCarloTreeSearch(self.net) eval_mcts = MonteCarloTreeSearch(self.eval_net) evaluator = Evaluate(current_mcts=current_mcts, eval_mcts=eval_mcts, game=self.game) wins, losses = evaluator.evaluate() print("wins:", wins) print("losses:", losses) num_games = wins + losses if num_games == 0: win_rate = 0 else: win_rate = wins / num_games print("win rate:", win_rate) if win_rate > CFG.eval_win_rate: # Save current model as the best model. print("New model saved as best model.") self.net.save_model("best_model") else: print("New model discarded and previous model loaded.") # Discard current model and use previous best model. self.net.load_model()
class Segmenter(object): def __init__(self, hdfs_client, flags): self.train_is_alive = False self.hdfs_client = hdfs_client self.flags = flags self.data_utils = DataUtils() def update_config(self): config_path = os.path.join(self.flags.raw_data_path, 'config.json') try: with open(config_path, encoding='utf-8', mode='r') as data_file: config_json = json.load(data_file) if 'use_lstm' in config_json: self.flags.use_lstm = config_json['use_lstm'] elif 'use_dynamic_rnn' in config_json: self.flags.use_dynamic_rnn = config_json['use_dynamic_rnn'] elif 'use_bidirectional_rnn' in config_json: self.flags.use_bidirectional_rnn = config_json[ 'use_bidirectional_rnn'] elif 'vocab_drop_limit' in config_json: self.flags.vocab_drop_limit = config_json[ 'vocab_drop_limit'] elif 'batch_size' in config_json: self.flags.batch_size = config_json['batch_size'] elif 'num_steps' in config_json: self.flags.num_steps = config_json['num_steps'] elif 'num_layer' in config_json: self.flags.num_layer = config_json['num_layer'] elif 'embedding_size' in config_json: self.flags.embedding_size = config_json['embedding_size'] elif 'learning_rate' in config_json: self.flags.learning_rate = config_json['learning_rate'] elif 'learning_rate_decay_factor' in config_json: self.flags.learning_rate_decay_factor = config_json[ 'learning_rate_decay_factor'] elif 'keep_prob' in config_json: self.flags.keep_prob = config_json['keep_prob'] elif 'clip_norm' in config_json: self.flags.clip_norm = config_json['clip_norm'] except: raise Exception('ERROR: config.json content invalid') def train(self): self.hdfs_client.hdfs_download( os.path.join(self.flags.input_path, 'train.txt'), os.path.join(self.flags.datasets_path, 'train.txt')) self.hdfs_client.hdfs_download( os.path.join(self.flags.input_path, 'test.txt'), os.path.join(self.flags.datasets_path, 'test.txt')) self.data_utils.label_segment_file( os.path.join(self.flags.datasets_path, 'train.txt'), os.path.join(self.flags.datasets_path, 'label_train.txt')) self.data_utils.label_segment_file( os.path.join(self.flags.datasets_path, 'test.txt'), os.path.join(self.flags.datasets_path, 'label_test.txt')) self.data_utils.split_label_file( os.path.join(self.flags.datasets_path, 'label_train.txt'), os.path.join(self.flags.datasets_path, 'split_train.txt')) self.data_utils.split_label_file( os.path.join(self.flags.datasets_path, 'label_test.txt'), os.path.join(self.flags.datasets_path, 'split_test.txt')) words_vocab, labels_vocab = self.data_utils.create_vocabulary( os.path.join(self.flags.datasets_path, 'split_train.txt'), self.flags.vocab_path, self.flags.vocab_drop_limit) train_word_ids_list, train_label_ids_list = self.data_utils.file_to_word_ids( os.path.join(self.flags.datasets_path, 'split_train.txt'), words_vocab, labels_vocab) test_word_ids_list, test_label_ids_list = self.data_utils.file_to_word_ids( os.path.join(self.flags.datasets_path, 'split_test.txt'), words_vocab, labels_vocab) tensorflow_utils = TensorflowUtils() tensorflow_utils.create_record( train_word_ids_list, train_label_ids_list, os.path.join(self.flags.tfrecords_path, 'train.tfrecords')) tensorflow_utils.create_record( test_word_ids_list, test_label_ids_list, os.path.join(self.flags.tfrecords_path, 'test.tfrecords')) self.hdfs_client.hdfs_upload( self.flags.vocab_path, os.path.join(self.flags.output_path, os.path.basename(self.flags.vocab_path))) train = Train() train.train() def upload_tensorboard(self): hdfs_tensorboard_path = os.path.join( self.flags.output_path, os.path.basename(os.path.normpath(self.flags.tensorboard_path))) temp_hdfs_tensorboard_path = hdfs_tensorboard_path + '-temp' self.hdfs_client.hdfs_upload(self.flags.tensorboard_path, temp_hdfs_tensorboard_path) self.hdfs_client.hdfs_delete(hdfs_tensorboard_path) self.hdfs_client.hdfs_mv(temp_hdfs_tensorboard_path, hdfs_tensorboard_path) def log_monitor(self): while (self.train_is_alive): time.sleep(120) self.upload_tensorboard() def upload_model(self): predict = Predict() predict.saved_model_pb() hdfs_checkpoint_path = os.path.join( self.flags.output_path, os.path.basename(os.path.normpath(self.flags.checkpoint_path))) hdfs_saved_model_path = os.path.join( self.flags.output_path, os.path.basename(os.path.normpath(self.flags.saved_model_path))) temp_hdfs_checkpoint_path = hdfs_checkpoint_path + '-temp' temp_hdfs_saved_model_path = hdfs_saved_model_path + '-temp' self.hdfs_client.hdfs_upload(self.flags.checkpoint_path, temp_hdfs_checkpoint_path) self.hdfs_client.hdfs_upload(self.flags.saved_model_path, temp_hdfs_saved_model_path) self.hdfs_client.hdfs_delete(hdfs_checkpoint_path) self.hdfs_client.hdfs_delete(hdfs_saved_model_path) self.hdfs_client.hdfs_mv(temp_hdfs_checkpoint_path, hdfs_checkpoint_path) self.hdfs_client.hdfs_mv(temp_hdfs_saved_model_path, hdfs_saved_model_path) def evaluate(self): shutil.rmtree(self.flags.vocab_path) shutil.rmtree(self.flags.checkpoint_path) self.hdfs_client.hdfs_download( os.path.join(self.flags.input_path, os.path.basename(self.flags.vocab_path)), self.flags.vocab_path) self.hdfs_client.hdfs_download( os.path.join(self.flags.input_path, 'test.txt'), os.path.join(self.flags.datasets_path, 'test.txt')) hdfs_checkpoint_path = os.path.join( self.flags.input_path, os.path.basename(self.flags.checkpoint_path)) self.hdfs_client.hdfs_download(hdfs_checkpoint_path, self.flags.checkpoint_path) self.data_utils.label_segment_file( os.path.join(self.flags.datasets_path, 'test.txt'), os.path.join(self.flags.datasets_path, 'label_test.txt')) self.data_utils.split_label_file( os.path.join(self.flags.datasets_path, 'label_test.txt'), os.path.join(self.flags.datasets_path, 'split_test.txt')) predict = Predict() predict.file_predict( os.path.join(self.flags.datasets_path, 'split_test.txt'), os.path.join(self.flags.datasets_path, 'test_predict.txt')) self.model_evaluate = Evaluate() self.model_evaluate.evaluate( os.path.join(self.flags.datasets_path, 'test_predict.txt'), os.path.join(self.flags.datasets_path, 'test_evaluate.txt')) self.hdfs_client.hdfs_delete( os.path.join(self.flags.output_path, 'test_evaluate.txt')) self.hdfs_client.hdfs_upload( os.path.join(self.flags.datasets_path, 'test_evaluate.txt'), os.path.join(self.flags.input_path, 'test_evaluate.txt'))
class Search: def __init__(self, position): self.position = position # Initialise transposition table self.TT_SIZE = 2 ** 16 self.tt = [None] * self.TT_SIZE # Used for move ordering with the killer heuristic # Indexed by ply and colour self.killers = [[None for _ in range(2)] for _ in range(50)] # Used for move ordering with the history heuristic # Indexed by colour, start square, and end square self.history = [[[0 for _ in range(64)] for _ in range(64)] for _ in range(2)] # Keeps track of node count during the search self.node_count = 0 # Keeps track of time during the search self.start_time = None self.time_limit = None self.eval = Evaluate() def search_moves(self, gen_type, hash_move=None, killers=None): # Search hash move first if hash_move and self.position.is_pseudo_legal(hash_move) and self.position.is_legal(hash_move): yield hash_move # Use specialised check evasion generator if gen_type == EVASIONS: evasions = self.position.get_check_evasions(self.position.colour) for move in evasions: yield move # Search captures before quiet moves if gen_type == CAPTURES or gen_type == ALL: captures = self.position.get_pseudo_legal_moves(CAPTURES) # Order captures by MVV/LVA captures = sorted(captures, key=lambda x: (-(MATERIAL[((x >> 12) & 0x3) + 2][MIDGAME] + MATERIAL[self.position.squares[x & 0x3F] & 7][MIDGAME]) if x & (0x3 << 14) == PROMOTION else -MATERIAL[self.position.squares[x & 0x3F] & 7][MIDGAME], MATERIAL[self.position.squares[(x >> 6) & 0x3F] & 7][MIDGAME])) # Return legal captures for move in captures: if self.position.is_legal(move): yield move # Search killer moves next if killers: killer1 = killers[0] if killer1: if killer1 != hash_move and self.position.is_pseudo_legal(killer1) and self.position.is_legal(killer1): yield killer1 killer2 = killers[1] if killer2: if killer2 != hash_move and self.position.is_pseudo_legal(killer2) and self.position.is_legal(killer2): yield killer2 # Search quiet moves last if gen_type == QUIETS or gen_type == ALL: quiets = self.position.get_pseudo_legal_moves(QUIETS) # Order quiet moves by history heuristic quiets = sorted(quiets, key=lambda x: -self.history[self.position.colour][(x >> 6) & 0x3F][x & 0x3F]) # Return legal quiet moves for move in quiets: if self.position.is_legal(move): yield move def tt_store(self, index, zobrist, move, depth, score, type_): self.tt[index] = TTEntry(zobrist, move, depth, score, type_) # Main search algorithm (Principal Variation Search) def pvs(self, alpha, beta, depth, ply=0): self.node_count += 1 is_pv_node = True if alpha != beta - 1 else False # Clear killer moves for child nodes as a new sibling node is entered self.killers[ply + 1][0] = None self.killers[ply + 1][1] = None # Check for threefold repetition rep_count = 0 for zobrist in self.position.repetition_stack[-self.position.halfmove_clock - 1:]: if zobrist == self.position.zobrist: rep_count += 1 if rep_count >= 2: return DRAW # Check for fifty-move rule if self.position.halfmove_clock >= 100: return DRAW hash_move = None # Transposition table index is obtained from the first 16 bits of the zobrist key tt_index = self.position.zobrist & 0xFFFF tt_entry = self.tt[tt_index] # If there is an existing entry, get the hash move and return the score if applicable # Return the hashed score if it is exact or tightens the current alpha-beta bounds if tt_entry and tt_entry.zobrist == self.position.zobrist: hash_move = tt_entry.move if tt_entry.depth >= depth: entry_type = tt_entry.type entry_score = tt_entry.score if entry_type == LOWER and entry_score >= beta: return entry_score if entry_type == UPPER and entry_score <= alpha: return entry_score if entry_type == EXACT: return entry_score # If depth is less than or equal to zero, fall through to the quiescence search if depth <= 0: return self.quiescence(alpha, beta, depth, ply) # Check if the time limit is exceeded every 1024 nodes if self.node_count & 1024: if time.time() - self.start_time > self.time_limit: raise SearchStoppedException # Endgame is defined as positions with only kings or pawns for the side to move if (self.position.player_occ[self.position.colour] & ~self.position.piece_bb[(self.position.colour << 3) | KING] & ~self.position.piece_bb[(self.position.colour << 3) | PAWN]): is_endgame = False else: is_endgame = True in_check = True if self.position.is_in_check() else False # Null move pruning if not in_check and not is_endgame and not is_pv_node and self.position.undo_info[-1]['move']: depth_reduction = 2 self.position.make_null_move() null_score = -self.pvs(-beta, -beta + 1, depth - depth_reduction - 1, ply + 1) self.position.undo_null_move() if null_score >= beta: return null_score if null_score <= -MATE: # Mate threat extension depth += 1 best_score = -INFINITY old_alpha = alpha move_count = 0 if in_check: moves = self.search_moves(EVASIONS, hash_move) else: moves = self.search_moves(ALL, hash_move, self.killers[ply][:]) for move in moves: move_count += 1 is_capture = True if (1 << (move & 0x3F)) & self.position.occupancy else False self.position.make_move(move) if move_count == 1: # Search first move (PV-move) with full window score = -self.pvs(-beta, -alpha, depth - 1, ply + 1) else: # Late move reductions if (move_count > 3 and not in_check and not is_capture and not is_endgame and not self.position.gives_check(move) and move & (0x3 << 14) != PROMOTION and move & (0x3 << 14) != CASTLING): depth_reduction = 1 score = -self.pvs(-alpha - 1, -alpha, depth - depth_reduction - 1, ply + 1) else: score = alpha + 1 # Only to trigger re-search with full depth if score > alpha: # Search non-PV moves with null window score = -self.pvs(-alpha - 1, -alpha, depth - 1, ply + 1) if alpha < score < beta: # Re-search with full window if better move found score = -self.pvs(-beta, -alpha, depth - 1, ply + 1) self.position.undo_move() if score > best_score: if score > alpha: if score >= beta: if not is_capture and move & (0x3 << 14) != PROMOTION: # Store killer move if move != self.killers[ply][0]: self.killers[ply][1] = self.killers[ply][0] self.killers[ply][0] = move # Increase score of move in history table self.history[self.position.colour][(move >> 6) & 0x3F][move & 0x3F] += depth * depth self.tt_store(tt_index, self.position.zobrist, move, depth, score, LOWER) return score alpha = score best_move = move best_score = score if move_count == 0: if in_check: return -MATE - depth # Checkmate else: return DRAW # Stalemate if best_score <= old_alpha: self.tt_store(tt_index, self.position.zobrist, None, depth, best_score, UPPER) else: self.tt_store(tt_index, self.position.zobrist, best_move, depth, best_score, EXACT) return best_score def quiescence(self, alpha, beta, depth, ply): self.node_count += 1 # Fifty-move rule if self.position.halfmove_clock >= 100: return DRAW if self.position.is_in_check(): moves = self.search_moves(EVASIONS) best_score = -INFINITY in_check = True else: moves = self.search_moves(CAPTURES) in_check = False # Static evaluation static_eval = self.eval.evaluate(self.position) if static_eval > alpha: if static_eval >= beta: return static_eval alpha = static_eval best_score = static_eval move_count = 0 for move in moves: move_count += 1 # SEE pruning when not in check if not in_check and self.position.see((move >> 6) & 0x3F, move & 0x3F) < 0: continue self.position.make_move(move) if move_count == 1: score = -self.quiescence(-beta, -alpha, depth - 1, ply + 1) else: score = -self.quiescence(-alpha - 1, -alpha, depth - 1, ply + 1) if alpha < score < beta: score = -self.quiescence(-beta, -alpha, depth - 1, ply + 1) self.position.undo_move() if score > best_score: if score > alpha: if score >= beta: return score alpha = score best_score = score if in_check and move_count == 0: return -MATE - depth # Checkmate return best_score # Wrap search algorithm in iterative deepening structure def iter_search(self, max_depth=math.inf, time_limit=math.inf): self.node_count = 0 self.start_time = time.time() self.time_limit = time_limit depth = 0 # Clear killer moves for ply in self.killers: ply[0] = None ply[1] = None while depth < max_depth and time.time() - self.start_time < self.time_limit: depth += 1 current_pos = copy.deepcopy(self.position) try: self.pvs(-INFINITY, INFINITY, depth) except SearchStoppedException: # Time expired self.position = current_pos break # Retrieve best move from transposition table tt_entry = self.tt[self.position.zobrist & 0xFFFF] if tt_entry: if tt_entry.zobrist == self.position.zobrist: tt_move = tt_entry.move tt_depth = tt_entry.depth tt_score = tt_entry.score else: raise Exception("Transposition table entry does not match current zobrist") else: raise Exception("No transposition table entry for current position") print("{} found move {} with depth {}, score of {}".format("Black" if self.position.colour else "White", self.position.move_to_san(tt_move), tt_depth, tt_score)) print("Searched {} nodes".format(self.node_count)) print("Time taken: {:0.2f}s".format(time.time() - self.start_time)) print() return tt_move # Perft function used for debugging def perft(self, depth): node_count = 0 if depth == 0: return 1 if self.position.is_in_check(): in_check = True moves = self.position.get_check_evasions(self.position.colour) else: in_check = False moves = self.position.get_pseudo_legal_moves() for move in moves: if not in_check: if not self.position.is_legal(move): continue self.position.make_move(move) child_nodes = self.perft(depth - 1) node_count += child_nodes self.position.undo_move() return node_count
parser.add_argument('--wgan', action='store_true', help=' use wgan gp or not') # eval options #parser.add_argument('--results_dir', type=str, default='../results/', help='saves results here.') #parser.add_argument('--num_test', type=int, default=50, help='how many test images to run') #parser.add_argument('--n_samples', type=int, default=5, help='#samples') #parser.add_argument('--no_encode', action='store_true', help='do not produce encoded image') #parser.add_argument('--sync', action='store_true', help='use the same latent code for different input images') #parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio for the results') parser.add_argument('--phase', type=str, default='train', help='train/eval') args = parser.parse_args() if __name__ == '__main__': if not os.path.exists(os.path.join(args.ckpt_dir, args.run_name)): os.makedirs(os.path.join(args.ckpt_dir, args.run_name)) if not os.path.exists(os.path.join(args.log_dir, args.run_name)): os.makedirs(os.path.join(args.log_dir, args.run_name)) if not os.path.exists(os.path.join(args.sample_dir, args.run_name)): os.makedirs(os.path.join(args.sample_dir, args.run_name)) if args.phase == 'train': trainer = Train(args) trainer.train() elif args.phase == 'eval': evaluater = Evaluate(args) evaluater.evaluate()
import numpy as np import keras from evaluate import Evaluate # Initialize model model = keras.models.load_model("model1") val_arr = [] # Keep list of past 30 values i = 0 for line in sys.stdin: row = line.split(',') row = np.array([float(x.strip()) for x in row]) val = row[3] # Grab open value from previous day val_arr.append(val) # Append to end for now... if (len(val_arr) > 30): del val_arr[0] # Delete first element if have > 30 elements if (i < 30): print("HOLD", 0) else: x_input = np.array(val_arr) x_input = np.reshape(x_input, (-1, x_input.shape[0], 1)) predicted_price = model.predict(x_input) choice, frac = Evaluate.evaluate(predicted_price, row[3]) # Send predicted price and prev close price print(choice, frac) # ('HOLD', 0.5) i += 1
elif sentence[cur:tail] in self._WordDict: ParseList.append(sentence[cur:tail]) tail = cur cur = tail - span if cur < 0: cur = 0 else: cur += 1 ParseList.reverse() return ParseList if __name__ == '__main__': E = Evaluate() p = PrePostNgram() p.Training() p.SeparWords('Pre') print('*****') print('Pre Max') E.evaluate() print('*****') p.SeparWords('Post') print('*****') print('Post Max') E.evaluate() print('*****') p.SeparWords('prepostBigram') print('*****') print('PrePostSegBigram Max') E.evaluate()