Пример #1
0
 def test_encode_movement(self):
     """
     Given candlestick (CSEncoder) object, compute how is its movement
     with respect to its previous one, which is passes as argument.
     Since first, second and third are tested in test_encode_tick()
     I will only test 4th against 3rd.
     -------------------------------------------
     0   10  20  30  40  50  60  70  80  90  100
     A   B   C   D   E   F   G   H   I   J   K
     -------------------------------------------
     """
     tags = encoded_tags()
     deltas = encoded_deltas()
     prev_cs = CSEncoder(self.params, self.data.iloc[0])
     for i in range(1, self.data.shape[0]):
         cs = CSEncoder(self.params, self.data.iloc[i])
         cs.encode_movement(prev_cs)
         self.assertAlmostEqual(cs.delta_open, deltas.iloc[i].delta_open)
         self.assertAlmostEqual(cs.delta_high, deltas.iloc[i].delta_high)
         self.assertAlmostEqual(cs.delta_low, deltas.iloc[i].delta_low)
         self.assertAlmostEqual(cs.delta_close, deltas.iloc[i].delta_close)
         self.assertEqual(cs.encoded_delta_open, tags.iloc[i].delta_open)
         self.assertEqual(cs.encoded_delta_high, tags.iloc[i].delta_high)
         self.assertEqual(cs.encoded_delta_low, tags.iloc[i].delta_low)
         self.assertEqual(cs.encoded_delta_close, tags.iloc[i].delta_close)
         prev_cs = cs
Пример #2
0
    def test_encode_tick(self):
        """
        This one checks if the CSEncoder is built and encoded, given
        that a tick is passed together with its previous one. If the previous
        one is None, then movement is not encoded.
        -------------------------------------------
        0   10  20  30  40  50  60  70  80  90  100
        A   B   C   D   E   F   G   H   I   J   K
        -------------------------------------------
        """
        # Start with the first one, which has no previous tick
        encoder = CSEncoder(self.params).fit(self.data)
        tags = encoded_tags()
        deltas = encoded_deltas()

        previous_row = None
        for i, row in self.data.iterrows():
            cs = encoder._encode_tick(row, previous_row)
            self.assertIsInstance(cs, CSEncoder)
            self.assertEqual(cs.encoded_delta_open, tags.at[i, 'delta_open'])
            self.assertEqual(cs.encoded_delta_high, tags.at[i, 'delta_high'])
            self.assertEqual(cs.encoded_delta_low, tags.at[i, 'delta_low'])
            self.assertEqual(cs.encoded_delta_close, tags.at[i, 'delta_close'])
            self.assertAlmostEqual(cs.delta_open, deltas.at[i, 'delta_open'])
            self.assertAlmostEqual(cs.delta_high, deltas.at[i, 'delta_high'])
            self.assertAlmostEqual(cs.delta_low, deltas.at[i, 'delta_low'])
            self.assertAlmostEqual(cs.delta_close, deltas.at[i, 'delta_close'])
            previous_row = cs
Пример #3
0
 def test_encode_body(self):
     """
     Ensure a proper encoding of the sample ticks in test_utils
     """
     tags = encoded_tags()
     for i in range(self.data.shape[0]):
         cs = CSEncoder(self.params, self.data.iloc[i])
         self.assertEqual(cs.encode_body(), tags.iloc[i]['body'])
Пример #4
0
 def test_inverse_transform(self):
     """
     Test that we can reverse a transformation to the original values.
     """
     encoder = CSEncoder(self.params)
     cse = encoder.fit_transform(self.data)
     # Inverse transform needs a dataframe as input, and the first CS.
     df = cs_to_df(cse, self.params.cse_tags)
     inv_cse = encoder.inverse_transform(df, cse[0])
     for i in range(inv_cse.shape[0]):
         self.assertEqual(inv_cse.iloc[i]['o'], self.data.iloc[i]['o'])
Пример #5
0
 def test_correct_encoding(self):
     """
     Test if this method is correctly capturing that column names reflect
     what the encoding is saying.
     """
     self.assertTrue(
         CSEncoder(self.params, encoding='ohlc')._correct_encoding())
     self.assertTrue(
         CSEncoder(self.params, encoding='OHLC')._correct_encoding())
     self.assertFalse(
         CSEncoder(self.params, encoding='0hlc')._correct_encoding())
     self.assertFalse(
         CSEncoder(self.params, encoding='ohl')._correct_encoding())
     self.assertFalse(
         CSEncoder(self.params, encoding='')._correct_encoding())
Пример #6
0
 def load_encoders(self, model_names):
     """Load a encoder for each network"""
     encoder = {}
     for name in model_names:
         encoder[name] = CSEncoder(self.params).load(
             model_names[name]['encoder'])
     return encoder
Пример #7
0
    def train(self, data: DataFrame):
        """
        Train networks to the data (OHLC) passed
        :param data: Data in OHLC format from the ticks module.
        :return: the NN trained, and the encoder used
        """
        # Remove the "Date" Column
        ticks = data.copy(deep=True).drop([self.params.csv_dict['d']], axis=1)
        # Train
        encoder = CSEncoder(self.params).fit(ticks)
        cse = encoder.ticks2cse(ticks)
        dataset = self.prepare_input(encoder, cse, self.params.subtypes)
        nn = self.train_nn(dataset, self.params.subtypes)
        encoder.save()

        return nn, encoder
Пример #8
0
def predict_next(ticks: DataFrame, encoder: CSEncoder, nn: Dict[str, CS_NN],
                 params: CSDictionary) -> float:
    """
    From a list of ticks, make a prediction of what will be the next CS.

    :param ticks: a dataframe of ticks with the expected headers and size
        corresponding to the window size of the network to be used.
    :param encoder: the encoder used to train the network
    :param nn: the recurrent network to make the prediction with
    :param params: the parameters file read from configuration.

    :return: the close value of the CS predicted.
    """
    # Check that the input group of ticks match the size of the window of
    # the network that is going to make the predict. That parameter is in
    # the window_size attribute within the 'encoder'.
    if ticks.shape[0] != encoder.params.window_size:
        info_msg = 'Tickgroup resizing: {} -> {}'
        params.log.info(
            info_msg.format(ticks.shape[0], encoder.params.window_size))
        ticks = ticks.iloc[-encoder.params.window_size():, :]
        ticks.reset_index()

    # encode the tick in CSE and OH. Reshape it to the expected LSTM format.
    cs_tick = encoder.transform(ticks[params.ohlc_tags])
    pred_body_cs = predict_body(cs_tick, encoder, nn)
    pred_move_cs = predict_move(cs_tick, encoder, nn)
    prediction_cs = pred_body_cs + pred_move_cs

    # Build a single row dataframe with the entire prediction
    prediction_df = pd.DataFrame(columns=params.cse_tags,
                                 data=np.array(prediction_cs).reshape(
                                     -1, len(params.cse_tags)))

    cs_values = '|'.join('{}'.format(_)
                         for _ in prediction_df[params.cse_tags].values[0])
    params.log.info(f"N2t {nn['body'].name} -> {cs_values}")

    # Convert the prediction to a real tick
    # TODO: Keep the date in predictions, so we can see for what date the
    #       prediction is being produced
    # TODO: Instead of returning only the CLOSE value, return the entire
    #       candlestick.
    pred = encoder.inverse_transform(prediction_df, cs_tick[-1])
    return pred['c'].values[-1]
Пример #9
0
def predict_body(cs_tick: List[CSEncoder], encoder: CSEncoder,
                 nn: Dict[str, CS_NN]) -> List[str]:
    cs_tick_body_oh = encoder.onehot['body'].encode(encoder.body(cs_tick))
    input_body = cs_tick_body_oh.values[np.newaxis, :, :]
    # get a prediction from the proper networks, for the body part
    raw_prediction = nn['body'].predict(input_body)[0]
    pred_body_oh = nn['body'].hardmax(raw_prediction)
    pred_body_cs = encoder.onehot['body'].decode(pred_body_oh)
    return pred_body_cs.tolist()
Пример #10
0
 def test_transform(self):
     """
     Test the method in charge of transforming an entire list of
     ticks into CSE format. It's only goal is to return an array with
     all of them.
     """
     cse = CSEncoder(self.params).fit_transform(self.data)
     self.assertEqual(len(cse), 6)
     for i in range(len(cse)):
         self.assertIsInstance(cse[i], CSEncoder)
Пример #11
0
 def test_encode_with(self):
     """
     Ensure correct encoding with sample data in class and robust
     type checking.
     """
     # Start checking that the first one is correctly encoded.
     cs = CSEncoder(self.params, self.data.iloc[0])
     with self.assertRaises(AssertionError):
         cs._encode_with('123')
     self.assertEqual(cs._encode_with('ABCDE'), 'A')
     # Try with the third one
     cs = CSEncoder(self.params, self.data.iloc[2])
     self.assertEqual(cs._encode_with('KLMNO'), 'M')
Пример #12
0
    def test_decode_cse(self):
        """
        Check that decodes correctly a tick, given the previous one.
        """
        col_names = list(self.params.csv_dict.keys())
        if 'd' in col_names:
            col_names.remove('d')
        encoder = CSEncoder(self.params).fit(self.data)
        cs = encoder.transform(self.data)

        # Check every tick
        for i in range(self.data.shape[0]):
            cs_df = encoded_cs_to_df(cs[i], self.params.cse_tags)
            prev_cs = cs[0] if i == 0 else cs[i - 1]
            # Define tolerance as 10% of the min-max range when reconstructing
            tol = prev_cs.hl_interval_width * 0.1
            # Decode the CS, and check
            tick = encoder._decode_cse(cs_df, prev_cs, col_names)
            self.assertLessEqual(abs(tick[0] - self.data.iloc[i]['o']), tol)
            self.assertLessEqual(abs(tick[1] - self.data.iloc[i]['h']), tol)
            self.assertLessEqual(abs(tick[2] - self.data.iloc[i]['l']), tol)
            self.assertLessEqual(abs(tick[3] - self.data.iloc[i]['c']), tol)
Пример #13
0
 def test_fit(self):
     """
     Measure main indicators for encoding of the second tick in the
     test data. I use the second one to be able to compare it against
     the first one.
     """
     cs = CSEncoder(self.params).fit(self.data)
     self.assertEqual(cs.cse_zero_open, 50.)
     self.assertEqual(cs.cse_zero_high, 100.)
     self.assertEqual(cs.cse_zero_low, 0.)
     self.assertEqual(cs.cse_zero_close, 50.5)
     self.assertTrue(cs.fitted)
     # Check that I've two css and they're the correct type
     self.assertEqual(len(cs.onehot), 2)
     for subtype in self.params.subtypes:
         self.assertIsNotNone(cs.onehot[subtype])
Пример #14
0
 def test_recursive_encode_movement(self):
     """
     A value is search within a range of discrete values(buckets).
     Once found, the corresponding substring at the position of the
     bucket is returned.
     """
     encoder = CSEncoder(self.params).fit(self.data)
     # Check calls with default dictionaries
     self.assertEqual(encoder._encode_movement(value=0.0), 'A')
     self.assertEqual(encoder._encode_movement(value=0.1), 'B')
     self.assertEqual(encoder._encode_movement(value=0.2), 'C')
     self.assertEqual(encoder._encode_movement(value=0.3), 'D')
     self.assertEqual(encoder._encode_movement(value=0.4), 'E')
     self.assertEqual(encoder._encode_movement(value=0.5), 'F')
     self.assertEqual(encoder._encode_movement(value=0.6), 'G')
     self.assertEqual(encoder._encode_movement(value=0.7), 'H')
     self.assertEqual(encoder._encode_movement(value=0.8), 'I')
     self.assertEqual(encoder._encode_movement(value=0.9), 'J')
     self.assertEqual(encoder._encode_movement(value=1.1), 'K')
Пример #15
0
def predict_move(cs_tick: List[CSEncoder], encoder: CSEncoder,
                 nn: Dict[str, CS_NN]) -> List[str]:
    cs_tick_move_oh = encoder.onehot['move'].encode(encoder.move(cs_tick))
    input_move = cs_tick_move_oh.values[np.newaxis, :, :]
    # Repeat everything with the move:
    # get a prediction from the proper network, for the MOVE part
    pred_length = len(encoder.onehot['move'].states)
    num_predictions = int(input_move.shape[2] / pred_length)
    y = nn['move'].predict(input_move)[0]
    Y_pred = [
        nn['move'].hardmax(y[i * pred_length:(i * pred_length) + pred_length])
        for i in range(num_predictions)
    ]
    pred_move_cs = [
        encoder.onehot['move'].decode(Y_pred[i])[0]
        for i in range(num_predictions)
    ]
    return pred_move_cs
Пример #16
0
    def test_CSEncoder(self):
        """
        Test the constructor. Normally called without any tick in the
        arguments, it
        """
        cse = CSEncoder(self.params)
        self.assertEqual(cse.open, 0.)
        self.assertEqual(cse.close, 0.)
        self.assertEqual(cse.high, 0.)
        self.assertEqual(cse.low, 0.)
        self.assertEqual(cse.min, 0.)
        self.assertEqual(cse.max, 0.)

        # Initialization
        self.assertEqual(cse.encoded_delta_close, 'pA')
        self.assertEqual(cse.encoded_delta_high, 'pA')
        self.assertEqual(cse.encoded_delta_low, 'pA')
        self.assertEqual(cse.encoded_delta_max, 'pA')
        self.assertEqual(cse.encoded_delta_min, 'pA')
        self.assertEqual(cse.encoded_delta_open, 'pA')
Пример #17
0
 def test_add_ohencoder(self):
     """ Check that a onehot encoder is created for every subtype """
     cs = CSEncoder(self.params).fit(self.data)
     # Check types
     for subtype in self.params.subtypes:
         self.assertIsInstance(cs.onehot[subtype], OHEncoder)
Пример #18
0
    def test_calc_parameters(self):
        """
        Test if the parameters computed for a sample tick are correct.
        """
        # Check with the first tick. (50, 100, 0, 50.5)
        cse = CSEncoder(self.params, self.data.iloc[0])
        self.assertTrue(cse.positive)
        self.assertFalse(cse.negative)

        # Percentiles, etc...
        self.assertLessEqual(cse.body_relative_size, 0.05)
        self.assertEqual(cse.hl_interval_width, 100.)
        self.assertEqual(cse.oc_interval_width, 0.5)
        self.assertEqual(cse.mid_body_point, 50.25)
        self.assertEqual(cse.mid_body_percentile, 0.5025)
        self.assertEqual(cse.min_percentile, 0.5)
        self.assertEqual(cse.max_percentile, 0.505)
        self.assertEqual(cse.upper_shadow_len, 49.5)
        self.assertEqual(cse.upper_shadow_percentile, 0.495)
        self.assertEqual(cse.lower_shadow_len, 50.)
        self.assertEqual(cse.lower_shadow_percentile, 0.5)
        self.assertAlmostEqual(cse.shadows_relative_diff, 0.005)
        self.assertEqual(cse.body_relative_size, 0.005)
        self.assertAlmostEqual(cse.shadows_relative_diff, 0.005)

        # Body position.
        self.assertTrue(cse.shadows_symmetric)
        self.assertTrue(cse.body_in_center)
        self.assertFalse(cse.body_in_lower_half)
        self.assertFalse(cse.body_in_upper_half)
        self.assertTrue(cse.has_both_shadows)
        self.assertTrue(cse.has_lower_shadow)
        self.assertTrue(cse.has_upper_shadow)

        # Check with the second tick. (80, 100, 0, 70)
        cse = CSEncoder(self.params, self.data.iloc[1])
        self.assertIsNot(cse.positive, cse.negative)
        self.assertFalse(cse.positive)
        self.assertTrue(cse.negative)

        # Percentiles, etc...
        self.assertLessEqual(cse.body_relative_size, 0.1, 'Body relative size')
        self.assertEqual(cse.hl_interval_width, 100.)
        self.assertEqual(cse.oc_interval_width, 10.)
        self.assertEqual(cse.mid_body_point, 75.)
        self.assertEqual(cse.mid_body_percentile, 0.75)
        self.assertEqual(cse.min_percentile, 0.7)
        self.assertEqual(cse.max_percentile, 0.8)
        self.assertEqual(cse.upper_shadow_len, 20.)
        self.assertEqual(cse.upper_shadow_percentile, 0.2)
        self.assertEqual(cse.lower_shadow_len, 70.)
        self.assertEqual(cse.lower_shadow_percentile, 0.7)
        self.assertEqual(cse.body_relative_size, 0.1)
        self.assertAlmostEqual(cse.shadows_relative_diff, 0.5)

        # Body position.
        self.assertFalse(cse.body_in_center)
        self.assertFalse(cse.body_in_lower_half)
        self.assertTrue(cse.body_in_upper_half)
        self.assertTrue(cse.has_both_shadows)
        self.assertTrue(cse.has_lower_shadow)
        self.assertTrue(cse.has_upper_shadow)
        self.assertFalse(cse.shadows_symmetric)
Пример #19
0
from cs_encoder import CSEncoder
from cs_logger import CSLogger
from params import Params
from ticks import Ticks

tf.compat.v1.logging.set_verbosity(tf.logging.ERROR)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

np.random.seed(1)
params = Params(args=sys.argv)
log = CSLogger(params._log_level)
ticks = Ticks()
ohlc_data = ticks.read_ohlc()

if params.do_train is True:
    encoder = CSEncoder().fit(ohlc_data)
    cse = encoder.ticks2cse(ohlc_data)
    dataset = split_datasets(encoder, cse, params.subtypes)
    nn = train_nn(dataset, params.subtypes)
    encoder.save()
else:
    nn = load_nn(params.model_names, params.subtypes)
    encoder = load_encoders(params.model_names)
    predictions = pd.DataFrame([])

    if params._predict_training:
        # for from_idx in range(0, ticks.shape[0] - params._window_size + 1):
        for from_idx in range(0, 50 - params._window_size + 1):
            tick_group = ohlc_data.iloc[from_idx:from_idx +
                                        params._window_size]
            prediction = single_prediction(tick_group, nn, encoder, params)