Пример #1
0
    def get(self, use_db, model_type, symbol, unit, count, period, partition,
            delta, seed, trees, jobs, longs):

        # If using db then validate db
        if use_db:

            @validate_db(db)
            def get_queries():
                return [query_to_dict(q) for q in Data.query.all()]

            json = get_queries()
        else:
            json = None

        m = market.Market(json=json,
                          symbol=symbol,
                          unit=unit,
                          count=count,
                          period=period)

        features = m.set_features(partition=partition)
        features = m.set_long_features(features,
                                       columns_to_set=longs,
                                       partition=partition)

        targets = market.set_targets(features, delta=delta)
        features = features.drop(['close'], axis=1)

        model = market.setup_model(features[:-1],
                                   targets,
                                   model_type=model_type.lower(),
                                   seed=seed,
                                   n_estimators=trees,
                                   n_jobs=jobs)

        next_date = features.tail(
            1)  # Remember the entry we didn't train?  Predict it.

        trend = market.target_code_to_name(model._predict_trends(next_date)[0])
        accuracy = model.accuracy(model.features.test, model.targets.test)
        proba = model._predict_probas(next_date)
        proba_log = model._predict_logs(next_date)  # Logarithmic scale

        return {
            'trend': trend,
            'test_set_accuracy': accuracy,
            'probabilities': {
                market.target_code_to_name(code): p
                for code, p in enumerate(proba[0])
            }
        }
Пример #2
0
def main():
    args = get_args()
    assert args.partition > 0, 'The data must be partitioned!'

    m = market.Market(symbol=args.symbol,
                      unit=args.unit,
                      count=args.count,
                      period=args.period)
    x = m.features(partition=args.partition)
    if args.long is not None:
        # Create long features DataFrame
        x_long = m.features(partition=2 * args.partition)

        # Remove features not specified by args.long
        unwanted_features = [f for f in x.columns if f not in args.long]
        x_long = x_long.drop(unwanted_features, axis=1)

        # Prefix long columns with 'long_' to fix naming conflicts
        x_long.columns = ['long_{0}'.format(f) for f in x_long.columns]

        # Merge the two DataFrames
        skip = args.partition
        x = pd.concat([x[skip:].reset_index(drop=True), x_long], axis=1)

    y = market.targets(x, delta=args.delta)
    x = x.drop(['close'], axis=1)

    model = market.setup_model(x[:-1],
                               y,
                               model_type=args.model.lower(),
                               seed=args.seed,
                               n_estimators=args.trees,
                               n_jobs=args.jobs)

    next_date = x.tail(1)  # Remember the entry we didn't train?  Predict it.

    # TODO: Reimplement display of confusion matrix and feature importances
    acc = model.accuracy(model.features.test, model.targets.test)
    print('Test Set Accuracy: {0:.3f}%'.format(100 * acc))

    trends = model._predict_trends(next_date)
    print('Predicted Trend: {0}'.format(market.target_code_to_name(trends[0])))

    if args.proba:
        probas = model._predict_probas(next_date)
        print('Probability: {0}'.format(probas[0]))
    if args.proba_log:
        logs = model._predict_logs(next_date)
        print('Log Probability: {0}'.format(logs[0]))
Пример #3
0
    def get(self, use_db, model, symbol, unit, count, period, partition, delta,
            seed, trees, jobs, longs):
        if use_db:
            db_json = None
        else:
            db_json = None

        m = market.Market(json=db_json,
                          symbol=symbol,
                          unit=unit,
                          count=count,
                          period=period)

        features = m.set_features(partition=partition)
        features = m.set_long_features(features,
                                       columns_to_set=longs,
                                       partition=partition)

        targets = market.set_targets(features, delta=delta)
        features = features.drop(['close'], axis=1)

        model = market.setup_model(features[:-1],
                                   targets,
                                   model_type=model.lower(),
                                   seed=seed,
                                   n_estimators=trees,
                                   n_jobs=jobs)

        next_date = features.tail(
            1)  # Remember the entry we didn't train?  Predict it.

        trend = market.target_code_to_name(model._predict_trends(next_date)[0])
        accuracy = model.accuracy(model.features.test, model.targets.test)
        proba = model._predict_probas(next_date)
        proba_log = model._predict_logs(next_date)  # Logarithmic scale

        return {
            "trend": trend,
            "test_set_accuracy": accuracy,
            "probabilities": {
                market.target_code_to_name(code): p
                for code, p in enumerate(proba[0])
            }
        }
Пример #4
0
    def predict_coin(self, coin, unit, api, model_type):
        if api == 'spec':
            pcoin = 'USDT_' + coin.strip()
            coinmarket = market.Market(symbol=pcoin,
                                       unit=unit,
                                       count=6,
                                       period=86400)

            #Retrieve the x and y axises
            x = coinmarket.features(partition=14)
            y = market.targets(x, delta=25)

            #Get rid of the close stat because it is not useful at all.
            x = x.drop(['close'], axis=1)

            #Now create the random forest model or other one supported by the package
            model = market.setup_model(x[:-1],
                                       y,
                                       model_type=model_type,
                                       seed=1,
                                       n_estimators=65,
                                       n_jobs=4)

            # Predict the target test set from the features test set
            trends = model._predict_trends(model.features.test)

            # Get accuracies
            ftr_imps = model.feature_importances()
            conf_mx = model.confusion_matrix(model.targets.test, trends)
            acc = model.accuracy(model.features.test, model.targets.test)

            #Predictions and probabilities for the next trend
            next_date = x.tail(1)
            trends = model._predict_trends(next_date)
            pt = "The market for {} is predicted to be {}!".format(
                coin, market.target_code_to_name(trends[0]))
            probas = model._predict_probas(next_date)
            pr = 'Probability: {0}'.format(probas[0])

            print(pr)
            logs = model._predict_logs(next_date)
            print('Log Probability: {0}'.format(logs[0]))
            return next_date, pt
Пример #5
0
def main():
    args = get_args()
    assert args.partition > 0, 'The data must be partitioned!'

    m = market.Market(symbol=args.symbol, unit=args.unit,
                      count=args.count, period=args.period)
    features = m.set_features(partition=args.partition)
    if args.long is not None:
        features = m.set_long_features(features,
                                       columns_to_set=args.long,
                                       partition=args.partition)

    targets = market.set_targets(features, delta=args.delta)
    features = features.drop(['close'], axis=1)

    model = market.setup_model(features[:-1], targets,
                               model_type=args.model.lower(),
                               seed=args.seed,
                               n_estimators=args.trees,
                               n_jobs=args.jobs)

    next_date = features.tail(1) # Remember the entry we didn't train?  Predict it.

    # TODO: Reimplement display of confusion matrix and feature importances
    acc = model.accuracy(model.features.test, model.targets.test)
    print('Test Set Accuracy: {0:.3f}%'.format(100 * acc))

    trends = model._predict_trends(next_date)
    print('Predicted Trend: {0}'.format(market.target_code_to_name(trends[0])))

    if args.proba:
        probas = model._predict_probas(next_date)
        print('Probability: {0}'.format(probas[0]))
    if args.proba_log:
        logs = model._predict_logs(next_date)
        print('Log Probability: {0}'.format(logs[0]))
Пример #6
0
from speculator import market

# Init market raw data
m = market.Market(symbol='USDT_BTC', unit='month', count=6, period=86400)

# Parse features, x axis
x = m.features(partition=14)

# Parse targets, y axis
y = market.targets(x, delta=25)

# Create the random forest model
# The last entry doesn't have a target (can't predict yet), so skip over it
model = market.setup_model(x[:-1],
                           y,
                           model_type='random_forest',
                           seed=1,
                           n_estimators=65,
                           n_jobs=4)

# Predict the target test set from the features test set
pred = model.predict(model.features.test)

# Get accuracies
ftr_imps = model.feature_importances()
conf_mx = model.confusion_matrix(model.targets.test, pred)
acc = model.accuracy(model.targets.test, pred)

# Display accuracies
print('##################')
print('# TEST SET       #')
print('##################')
Пример #7
0
def main():
    args = get_args()

    m = market.Market(symbol=args.symbol,
                      unit=args.unit,
                      count=args.count,
                      period=args.period)
    x = m.features(partition=args.partition)
    if args.long is not None:
        # Create long features DataFrame
        x_long = m.features(partition=2 * args.partition)

        # Remove features not specified by args.long
        unwanted_features = [f for f in x.columns if f not in args.long]
        x_long = x_long.drop(unwanted_features, axis=1)

        # Prefix long columns with 'long_' to fix naming conflicts
        x_long.columns = ['long_{0}'.format(f) for f in x_long.columns]

        # Merge the two DataFrames
        skip = args.partition
        x = pd.concat([x[skip:].reset_index(drop=True), x_long], axis=1)

    y = market.targets(x, delta=args.delta)
    x = x.drop(['close'], axis=1)
    model = market.setup_model(x[:-1],
                               y,
                               model_type=args.model,
                               seed=args.seed,
                               n_estimators=args.trees,
                               n_jobs=args.jobs)

    # Predict the target test set from the features test set
    pred = model.predict(model.features.test)

    # Get accuracies
    ftr_imps = model.feature_importances()
    conf_mx = model.confusion_matrix(model.targets.test, pred)
    acc = model.accuracy(model.targets.test, pred)

    # Display accuracies
    print('##################')
    print('# TEST SET       #')
    print('##################')
    print('Accuracy: {0:.3f}%'.format(100 * acc))
    print('\nConfusion Matrix:')
    print(conf_mx)
    print(market.TARGET_CODES)
    print('\nFeature Importance:')
    for ftr, imp in ftr_imps:
        print('  {0}: {1:.3f}%'.format(ftr, 100 * imp))

    print()

    # Display prediction and probabilities for the next trend
    print('##################')
    print('# PREDICTED NEXT #')
    print('##################')
    next_date = x.tail(1)  # Remember the entry we didn't train?  Predict it.
    trend = market.target_code_to_name(model.predict(next_date)[0])
    print('Trend: {0}'.format(trend))
    if args.proba:
        print('Probability: {0}'.format(model.predict_proba(next_date)))
    if args.proba_log:
        print('Log Probability: {0}'.format(
            model.predict_log_proba(next_date)))