Exemplo n.º 1
0
def main(debug):
    interval = choice(INTERVALS)

    minutes = 0
    while True:
        minutes += 1
        seconds_to_run = 60 * minutes
        seconds_info_intervals = seconds_to_run / 4
        logging.error('Training each currency for {0} minutes'.format(minutes))

        # shuffle(CURRENCIES)
        for currency, min_trail in CURRENCIES.iteritems():
            pip_mul = 100. if 'JPY' in currency else 10000.
            actions = calculateActions(min_trail)

            df = loadData(currency, interval)
            df = df[-2000:]

            df = getBackgroundKnowledge(df, PERIODS)
            # print df
            # break

            alpha = 0
            epsilon = 0
            q = loadQ(currency, interval)

            time_start = time()
            time_interval = 0

            epoch = 0
            rewards = []
            errors = []
            ticks = []
            logging.warn('Training {0} on {1} with {2} ticks...'.format(currency, interval, len(df)))
            while True:
                epoch += 1
                logging.info(' ')
                logging.info('{0}'.format('=' * 20))
                logging.info('EPOCH {0}'.format(epoch))

                index_start = randint(0, len(df)-20)
                df_inner = df.iloc[index_start:]
                logging.info('Epoch: at {0} with {1} ticks'.format(index_start, len(df_inner)))
                q, r, error, tick = train(df_inner, q, alpha, epsilon, PERIODS, actions, pip_mul)

                # results
                rewards.append(r)
                errors.append(error * pip_mul)
                ticks.append(tick)

                # win ratio
                wins = [1. if r > 0. else 0. for r in rewards]
                win_ratio = np.mean(wins)
                # logging.error('wr {0}'.format(win_ratio))

                # adjust values
                epsilon = np.sqrt((1 - win_ratio) * 100.) / 100.
                alpha = epsilon / 2.
                # logging.error('new alpha = {0}'.format(alpha))

                if time() - time_start > time_interval or debug:

                    # prune lengths
                    while len(rewards) > 1000 + minutes * 1000:
                        rewards.pop(0)
                        errors.pop(0)
                        ticks.pop(0)

                    # RMSD
                    rmsd = np.sqrt(np.mean([e**2 for e in errors]))
                    logging.info('RMSD: {0}'.format(rmsd))

                    logging.warn('{0} [{1:05d}] RMSD {2:03d} PPT {3:03d} WR {5:.0f}% [ticks:{6:.1f} sum:{4:.1f}, a:{7:.2f}, e:{8:.2f}]'.format(
                        currency,
                        epoch,
                        int(rmsd),
                        int(np.mean(rewards) * pip_mul),
                        sum(rewards),
                        np.mean(wins) * 100,
                        np.mean(ticks),
                        alpha * 100,
                        epsilon * 100,
                    ))

                    # exit
                    if (time() - time_start >= seconds_to_run) or debug:
                        break

                    # saveQ(currency, interval, q)
                    time_interval += seconds_info_intervals

            saveQ(currency, interval, q)

            summarizeActions(q)

            if debug:
                break  # currencies

        if debug:
            break  # forever
Exemplo n.º 2
0
def main(debug):
    interval = choice(INTERVALS)

    minutes = 0
    while True:
        minutes += 1
        seconds_to_run = 60 * minutes
        seconds_info_intervals = seconds_to_run / 4
        logging.error('Training each currency for {0} minutes'.format(minutes))

        # shuffle(CURRENCIES)
        for currency, min_trail in CURRENCIES.iteritems():
            pip_mul = 100. if 'JPY' in currency else 10000.
            actions = calculateActions(min_trail)

            df = loadData(currency, interval)
            df = df[-2000:]

            df = getBackgroundKnowledge(df, PERIODS)
            # print df
            # break

            alpha = 0
            epsilon = 0
            q = loadQ(currency, interval)

            time_start = time()
            time_interval = 0

            epoch = 0
            rewards = []
            errors = []
            ticks = []
            logging.warn('Training {0} on {1} with {2} ticks...'.format(
                currency, interval, len(df)))
            while True:
                epoch += 1
                logging.info(' ')
                logging.info('{0}'.format('=' * 20))
                logging.info('EPOCH {0}'.format(epoch))

                index_start = randint(0, len(df) - 20)
                df_inner = df.iloc[index_start:]
                logging.info('Epoch: at {0} with {1} ticks'.format(
                    index_start, len(df_inner)))
                q, r, error, tick = train(df_inner, q, alpha, epsilon, PERIODS,
                                          actions, pip_mul)

                # results
                rewards.append(r)
                errors.append(error * pip_mul)
                ticks.append(tick)

                # win ratio
                wins = [1. if r > 0. else 0. for r in rewards]
                win_ratio = np.mean(wins)
                # logging.error('wr {0}'.format(win_ratio))

                # adjust values
                epsilon = np.sqrt((1 - win_ratio) * 100.) / 100.
                alpha = epsilon / 2.
                # logging.error('new alpha = {0}'.format(alpha))

                if time() - time_start > time_interval or debug:

                    # prune lengths
                    while len(rewards) > 1000 + minutes * 1000:
                        rewards.pop(0)
                        errors.pop(0)
                        ticks.pop(0)

                    # RMSD
                    rmsd = np.sqrt(np.mean([e**2 for e in errors]))
                    logging.info('RMSD: {0}'.format(rmsd))

                    logging.warn(
                        '{0} [{1:05d}] RMSD {2:03d} PPT {3:03d} WR {5:.0f}% [ticks:{6:.1f} sum:{4:.1f}, a:{7:.2f}, e:{8:.2f}]'
                        .format(
                            currency,
                            epoch,
                            int(rmsd),
                            int(np.mean(rewards) * pip_mul),
                            sum(rewards),
                            np.mean(wins) * 100,
                            np.mean(ticks),
                            alpha * 100,
                            epsilon * 100,
                        ))

                    # exit
                    if (time() - time_start >= seconds_to_run) or debug:
                        break

                    # saveQ(currency, interval, q)
                    time_interval += seconds_info_intervals

            saveQ(currency, interval, q)

            summarizeActions(q)

            if debug:
                break  # currencies

        if debug:
            break  # forever
Exemplo n.º 3
0
def main(debug):

    minutes = 0
    while True:
        minutes += 1
        seconds_to_run = 60 * minutes
        seconds_info_intervals = seconds_to_run / 5
        logging.error('Training each currency for {0} minutes'.format(minutes))

        shuffle(DATA)
        for info in DATA:
            logging.debug('Currency info: {0}'.format(info))
            currency = info['currency']
            interval = info['intervals'][0]
            pip_mul = info['pip_mul']

            df = loadData(currency, interval, 'train')

            df = getBackgroundKnowledge(df, PERIODS)

            alpha = 0.
            epsilon = alpha / 2.
            q = loadQ(currency, interval)

            time_start = time()
            time_interval = seconds_info_intervals

            epoch = 0
            rewards = []
            errors = []
            ticks = []
            logging.warn('Training {0} on {1} with {2} ticks [m:{3}]'.format(
                currency,
                interval,
                len(df),
                minutes,
            ))

            while True:
                epoch += 1
                logging.info(' ')
                logging.info('{0}'.format('=' * 20))
                logging.info('EPOCH {0}'.format(epoch))

                index_start = randint(0, len(df)-20)
                df_inner = df.iloc[index_start:]
                logging.info('Epoch: at {0} with {1} ticks'.format(index_start, len(df_inner)))
                q, r, error, tick = train(df_inner, q, alpha, epsilon, PERIODS, ACTIONS, pip_mul, info['std'])

                # results
                error *= pip_mul
                rewards.append(r)
                errors.append(error)
                ticks.append(tick)

                # win ratio
                wins = [1. if r > 0. else 0. for r in rewards]
                win_ratio = np.mean(wins)
                # logging.error('wr {0}'.format(win_ratio))

                # adjust values
                alpha = 1.0102052281586786e+000 + (-2.0307383627607809e+000 * win_ratio) + (1.0215546892913909e+000 * win_ratio**2)
                epsilon = 3.9851080604500078e-001 + (2.1874724815820201e-002 * win_ratio) + (-4.1444101741886652e-001 * win_ratio**2)
                # logging.error('new alpha = {0}'.format(alpha))

                # only do updates at interval
                if time() - time_start > time_interval or debug:

                    # prune lengths
                    while len(rewards) > 1000 + minutes * 1000:
                        rewards.pop(0)
                        errors.pop(0)
                        ticks.pop(0)

                    # RMSD
                    rmsd = np.sqrt(np.mean([e**2 for e in errors]))
                    # logging.error('RMSD: {0} from new error {1}'.format(rmsd, error))

                    logging.warn('{0} [{1:05d}] RMSD {2:03d} PPT {3:03d} WR {5:.0f}% [ticks:{6:.1f} sum:{4:.1f}, a:{7:.2f}, e:{8:.2f}]'.format(
                        currency,
                        epoch,
                        int(rmsd),
                        int(np.mean(rewards) * pip_mul),
                        sum(rewards),
                        np.mean(wins) * 100,
                        np.mean(ticks),
                        alpha * 100,
                        epsilon * 100,
                    ))

                    # exit
                    if time_interval >= seconds_to_run or debug:
                        break

                    # continue
                    time_interval += seconds_info_intervals
                    saveQ(currency, interval, q)

            saveQ(currency, interval, q)

            summarizeActions(q)

            if debug:
                break  # currencies

        if debug:
            break  # forever
Exemplo n.º 4
0
def main(debug):

    minutes = 0
    while True:
        minutes += 1
        seconds_to_run = 60 * minutes
        seconds_info_intervals = seconds_to_run / 5
        logging.error('Training each currency for {0} minutes'.format(minutes))

        shuffle(DATA)
        for info in DATA:
            logging.debug('Currency info: {0}'.format(info))
            currency = info['currency']
            interval = info['intervals'][0]
            pip_mul = info['pip_mul']

            df = loadData(currency, interval, 'train')

            df = getBackgroundKnowledge(df, PERIODS)

            alpha = 0.
            epsilon = alpha / 2.
            q = loadQ(currency, interval)

            time_start = time()
            time_interval = seconds_info_intervals

            epoch = 0
            rewards = []
            errors = []
            ticks = []
            logging.warn('Training {0} on {1} with {2} ticks [m:{3}]'.format(
                currency,
                interval,
                len(df),
                minutes,
            ))

            while True:
                epoch += 1
                logging.info(' ')
                logging.info('{0}'.format('=' * 20))
                logging.info('EPOCH {0}'.format(epoch))

                index_start = randint(0, len(df) - 20)
                df_inner = df.iloc[index_start:]
                logging.info('Epoch: at {0} with {1} ticks'.format(
                    index_start, len(df_inner)))
                q, r, error, tick = train(df_inner, q, alpha, epsilon, PERIODS,
                                          ACTIONS, pip_mul, info['std'])

                # results
                error *= pip_mul
                rewards.append(r)
                errors.append(error)
                ticks.append(tick)

                # win ratio
                wins = [1. if r > 0. else 0. for r in rewards]
                win_ratio = np.mean(wins)
                # logging.error('wr {0}'.format(win_ratio))

                # adjust values
                alpha = 1.0102052281586786e+000 + (
                    -2.0307383627607809e+000 *
                    win_ratio) + (1.0215546892913909e+000 * win_ratio**2)
                epsilon = 3.9851080604500078e-001 + (
                    2.1874724815820201e-002 *
                    win_ratio) + (-4.1444101741886652e-001 * win_ratio**2)
                # logging.error('new alpha = {0}'.format(alpha))

                # only do updates at interval
                if time() - time_start > time_interval or debug:

                    # prune lengths
                    while len(rewards) > 1000 + minutes * 1000:
                        rewards.pop(0)
                        errors.pop(0)
                        ticks.pop(0)

                    # RMSD
                    rmsd = np.sqrt(np.mean([e**2 for e in errors]))
                    # logging.error('RMSD: {0} from new error {1}'.format(rmsd, error))

                    logging.warn(
                        '{0} [{1:05d}] RMSD {2:03d} PPT {3:03d} WR {5:.0f}% [ticks:{6:.1f} sum:{4:.1f}, a:{7:.2f}, e:{8:.2f}]'
                        .format(
                            currency,
                            epoch,
                            int(rmsd),
                            int(np.mean(rewards) * pip_mul),
                            sum(rewards),
                            np.mean(wins) * 100,
                            np.mean(ticks),
                            alpha * 100,
                            epsilon * 100,
                        ))

                    # exit
                    if time_interval >= seconds_to_run or debug:
                        break

                    # continue
                    time_interval += seconds_info_intervals
                    saveQ(currency, interval, q)

            saveQ(currency, interval, q)

            summarizeActions(q)

            if debug:
                break  # currencies

        if debug:
            break  # forever