def train(start, end, label, label_values, model_dir, receipt): train_path = create_train_path() start_date = datetime.strptime(start, '%d-%m-%Y') end_date = datetime.strptime(end, '%d-%m-%Y') next_date = end_date + pd.DateOffset(years=1) if end_date > datetime.now(): end_date = datetime.now() learning_cfg = get_learning_cfg(model_dir) train_filename = "train-players-" + start_date.strftime( "%d-%m-%Y") + '-' + end_date.strftime("%d-%m-%Y") + ".csv" evaluate_filename = "train-players-" + end_date.strftime( "%d-%m-%Y") + '-' + next_date.strftime("%d-%m-%Y") + ".csv" train_file_path = local_dir + train_path + train_filename evaluate_file_path = local_dir + train_path + evaluate_filename has_data = model_utils.create_csv(url=model_utils.EVENT_MODEL_URL, filename=train_file_path, start_date=start_date, end_date=end_date, aws_path=train_path) if learning_cfg['evaluate']: has_test_data = model_utils.create_csv(url=model_utils.EVENT_MODEL_URL, filename=evaluate_file_path, start_date=end_date, end_date=next_date, aws_path=train_path) if has_data == True and has_test_data == False: evaluate_filename = None else: logger.info('we can evaluate') if has_data: train_filename = train_path + train_filename if evaluate_filename is not None: evaluate_filename = train_path + evaluate_filename match_model.create(train=True, label=label, label_values=label_values, model_dir=model_dir, train_filename=train_filename, test_filename=evaluate_filename, init=True) else: logger.info('no data to train') if receipt is not None: receipt_service.put_receipt(receipt_service.TRAIN_RECEIPT_URL, receipt, None)
def create(country, train, label, label_values, model_dir, train_filename, test_filename): logger.info('create match model called') learning_cfg = get_learning_cfg(model_dir) aws_model_dir = 'models/' + model_dir + '/' + country tf_models_dir = local_dir + '/' + aws_model_dir logger.info('creating vocab') team_file = vocab_service.create_vocab(filename=vocab_service.TEAMS_FILE, country=country) feature_columns = match_featureset.create_feature_columns( team_vocab=team_file) # Build 2 hidden layer DNN with 10, 10 units respectively. (from example will enrich at some point). classifier = classifier_utils.create(feature_columns=feature_columns, classes=len(label_values), model_dir=aws_model_dir, learning_cfg=learning_cfg) if train: logger.info('training started') if learning_cfg['evaluate'] and test_filename is not None: logger.info('load dataset - evaluate mode') (train_x, train_y), (test_x, test_y) = match_dataset.load_data( train_path=local_dir + train_filename, test_path=local_dir + test_filename, y_name=label, convert=label_values) else: logger.info('load dataset - normal mode') (train_x, train_y) = match_dataset.load_train_data( train_path=local_dir + train_filename, y_name=label, convert=label_values) # Train the Model. logger.info('training the model') classifier.train(input_fn=lambda: dataset_utils.train_input_fn( train_x, train_y, learning_cfg['batch_size']), steps=learning_cfg['steps']) if learning_cfg['evaluate'] and test_filename is not None: logger.info('evaluate') # Evaluate the model. not much use anymore. but could use the first test file. makes sense eval_result = classifier.evaluate( input_fn=lambda: dataset_utils.eval_input_fn( test_x, test_y, learning_cfg['batch_size'])) logger.info( '\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result)) tidy_up(tf_models_dir=tf_models_dir, aws_model_dir=aws_model_dir, team_file=team_file, train_filename=train_filename) return classifier
def create(train, label, label_values, model_dir, train_filename, test_filename, init): aws_model_dir = 'models/' + model_dir tf_models_dir = local_dir + '/' + aws_model_dir learning_cfg = get_learning_cfg(model_dir) logger.info(learning_cfg) logger.info('team vocab started...') team_file = vocab_service.create_vocab(filename=vocab_service.TEAMS_FILE) logger.info('team vocab completed') logger.info('player vocab started...') player_file = vocab_service.create_vocab( filename=vocab_service.PLAYERS_FILE) logger.info('[player vocab completed') # and the other numerics. they will be read from a CSV / or direct from mongo more likely. yes. from mongo. # and review checkpoints, to only train with the newest data? or build from scratch. lets see. # need to add the label field too. feature_columns = match_featureset.create_feature_columns( team_vocab=team_file, player_vocab=player_file) # Build 2 hidden layer DNN with 10, 10 units respectively. (from example will enrich at some point). classifier = classifier_utils.create(feature_columns=feature_columns, classes=len(label_values), model_dir=aws_model_dir, learning_cfg=learning_cfg, init=init) if train: logger.info(label_values) if learning_cfg['evaluate'] and test_filename is not None: (train_x, train_y), (test_x, test_y) = match_dataset.load_data( train_path=local_dir + train_filename, test_path=local_dir + test_filename, y_name=label, convert=label_values) else: (train_x, train_y) = match_dataset.load_train_data( train_path=local_dir + train_filename, y_name=label, convert=label_values) # Train the Model. classifier.train(input_fn=lambda: dataset_utils.train_input_fn( train_x, train_y, learning_cfg['batch_size']), steps=learning_cfg['steps']) if learning_cfg['evaluate'] and test_filename is not None: # Evaluate the model. not much use anymore. but could use the first test file. makes sense eval_result = classifier.evaluate( input_fn=lambda: dataset_utils.eval_input_fn( test_x, test_y, learning_cfg['batch_size'])) logger.info( '\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result)) if init: logger.info('tidying up') tidy_up(tf_models_dir=tf_models_dir, aws_model_dir=aws_model_dir, train_filename=train_filename) time.sleep(30) return classifier
from service.config_service import get_learning_cfg get_learning_cfg('match_goals')