Пример #1
0
def train(start, end, label, label_values, model_dir, receipt):
    train_path = create_train_path()

    start_date = datetime.strptime(start, '%d-%m-%Y')
    end_date = datetime.strptime(end, '%d-%m-%Y')

    next_date = end_date + pd.DateOffset(years=1)

    if end_date > datetime.now():
        end_date = datetime.now()

    learning_cfg = get_learning_cfg(model_dir)

    train_filename = "train-players-" + start_date.strftime(
        "%d-%m-%Y") + '-' + end_date.strftime("%d-%m-%Y") + ".csv"
    evaluate_filename = "train-players-" + end_date.strftime(
        "%d-%m-%Y") + '-' + next_date.strftime("%d-%m-%Y") + ".csv"
    train_file_path = local_dir + train_path + train_filename
    evaluate_file_path = local_dir + train_path + evaluate_filename

    has_data = model_utils.create_csv(url=model_utils.EVENT_MODEL_URL,
                                      filename=train_file_path,
                                      start_date=start_date,
                                      end_date=end_date,
                                      aws_path=train_path)

    if learning_cfg['evaluate']:

        has_test_data = model_utils.create_csv(url=model_utils.EVENT_MODEL_URL,
                                               filename=evaluate_file_path,
                                               start_date=end_date,
                                               end_date=next_date,
                                               aws_path=train_path)

        if has_data == True and has_test_data == False:
            evaluate_filename = None
        else:
            logger.info('we can evaluate')

    if has_data:

        train_filename = train_path + train_filename
        if evaluate_filename is not None:
            evaluate_filename = train_path + evaluate_filename

        match_model.create(train=True,
                           label=label,
                           label_values=label_values,
                           model_dir=model_dir,
                           train_filename=train_filename,
                           test_filename=evaluate_filename,
                           init=True)
    else:
        logger.info('no data to train')

    if receipt is not None:
        receipt_service.put_receipt(receipt_service.TRAIN_RECEIPT_URL, receipt,
                                    None)
Пример #2
0
def create(country, train, label, label_values, model_dir, train_filename,
           test_filename):
    logger.info('create match model called')
    learning_cfg = get_learning_cfg(model_dir)

    aws_model_dir = 'models/' + model_dir + '/' + country
    tf_models_dir = local_dir + '/' + aws_model_dir

    logger.info('creating vocab')

    team_file = vocab_service.create_vocab(filename=vocab_service.TEAMS_FILE,
                                           country=country)

    feature_columns = match_featureset.create_feature_columns(
        team_vocab=team_file)

    # Build 2 hidden layer DNN with 10, 10 units respectively.  (from example will enrich at some point).
    classifier = classifier_utils.create(feature_columns=feature_columns,
                                         classes=len(label_values),
                                         model_dir=aws_model_dir,
                                         learning_cfg=learning_cfg)

    if train:
        logger.info('training started')

        if learning_cfg['evaluate'] and test_filename is not None:
            logger.info('load dataset - evaluate mode')
            (train_x, train_y), (test_x, test_y) = match_dataset.load_data(
                train_path=local_dir + train_filename,
                test_path=local_dir + test_filename,
                y_name=label,
                convert=label_values)

        else:
            logger.info('load dataset - normal mode')
            (train_x, train_y) = match_dataset.load_train_data(
                train_path=local_dir + train_filename,
                y_name=label,
                convert=label_values)

        # Train the Model.
        logger.info('training the model')
        classifier.train(input_fn=lambda: dataset_utils.train_input_fn(
            train_x, train_y, learning_cfg['batch_size']),
                         steps=learning_cfg['steps'])

        if learning_cfg['evaluate'] and test_filename is not None:
            logger.info('evaluate')
            # Evaluate the model.   not much use anymore.  but could use the first test file.  makes sense
            eval_result = classifier.evaluate(
                input_fn=lambda: dataset_utils.eval_input_fn(
                    test_x, test_y, learning_cfg['batch_size']))

            logger.info(
                '\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

        tidy_up(tf_models_dir=tf_models_dir,
                aws_model_dir=aws_model_dir,
                team_file=team_file,
                train_filename=train_filename)

    return classifier
Пример #3
0
def create(train, label, label_values, model_dir, train_filename,
           test_filename, init):
    aws_model_dir = 'models/' + model_dir
    tf_models_dir = local_dir + '/' + aws_model_dir

    learning_cfg = get_learning_cfg(model_dir)

    logger.info(learning_cfg)

    logger.info('team vocab started...')
    team_file = vocab_service.create_vocab(filename=vocab_service.TEAMS_FILE)
    logger.info('team vocab completed')

    logger.info('player vocab started...')
    player_file = vocab_service.create_vocab(
        filename=vocab_service.PLAYERS_FILE)
    logger.info('[player vocab completed')

    # and the other numerics.  they will be read from a CSV / or direct from mongo more likely.  yes.  from mongo.
    # and review checkpoints, to only train with the newest data?  or build from scratch.  lets see.
    # need to add the label field too.

    feature_columns = match_featureset.create_feature_columns(
        team_vocab=team_file, player_vocab=player_file)

    # Build 2 hidden layer DNN with 10, 10 units respectively.  (from example will enrich at some point).
    classifier = classifier_utils.create(feature_columns=feature_columns,
                                         classes=len(label_values),
                                         model_dir=aws_model_dir,
                                         learning_cfg=learning_cfg,
                                         init=init)

    if train:

        logger.info(label_values)

        if learning_cfg['evaluate'] and test_filename is not None:
            (train_x, train_y), (test_x, test_y) = match_dataset.load_data(
                train_path=local_dir + train_filename,
                test_path=local_dir + test_filename,
                y_name=label,
                convert=label_values)

        else:
            (train_x, train_y) = match_dataset.load_train_data(
                train_path=local_dir + train_filename,
                y_name=label,
                convert=label_values)

        # Train the Model.
        classifier.train(input_fn=lambda: dataset_utils.train_input_fn(
            train_x, train_y, learning_cfg['batch_size']),
                         steps=learning_cfg['steps'])

        if learning_cfg['evaluate'] and test_filename is not None:
            # Evaluate the model.   not much use anymore.  but could use the first test file.  makes sense
            eval_result = classifier.evaluate(
                input_fn=lambda: dataset_utils.eval_input_fn(
                    test_x, test_y, learning_cfg['batch_size']))

            logger.info(
                '\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

        if init:
            logger.info('tidying up')
            tidy_up(tf_models_dir=tf_models_dir,
                    aws_model_dir=aws_model_dir,
                    train_filename=train_filename)

            time.sleep(30)

    return classifier
from service.config_service import get_learning_cfg

get_learning_cfg('match_goals')