def prepare_ts_file(self,
                     start_index,
                     end_index,
                     case_observation_size,
                     labeling_index,
                     label_window):
     file_full_path = self.prepare_ts_file_name(start_index,
                                                end_index, case_observation_size,
                                                labeling_index, label_window)
     if Path(file_full_path).is_file():
         logger("MODEL-DATA-PREP").debug("Using existing file: " + file_full_path)
         return file_full_path
     try:
         fit_data_file = open(file_full_path, 'w')
         fit_data_file.write(
             "@problemName fit_data\n@timeStamps false\n@univariate false\n@classLabel true True False\n@data\n")
         no_cases = 0
         case_last_data_point_index = end_index
         while case_last_data_point_index > start_index + case_observation_size:
             case_data_points = self.data_points[
                                case_last_data_point_index - case_observation_size:case_last_data_point_index]
             case_filter_flags = self.data_points_filter_results[
                                 case_last_data_point_index - case_observation_size:case_last_data_point_index]
             case_label = self.correct_decision_labels[case_last_data_point_index - 1][labeling_index]
             case_str = sktime_case_string_of(case_data_points, case_filter_flags, case_label,
                                              self.every_m_observations_for_dimension)
             fit_data_file.write(case_str + "\n")
             no_cases += 1
             case_last_data_point_index -= 1
         logger("MODEL-DATA-PREP").debug("No cases written: " + str(no_cases) + " -> " + file_full_path)
         return file_full_path
     finally:
         fit_data_file.close()
Exemplo n.º 2
0
    def fit(self, luck_average_windows, assessment_windows, until=None, max_horizon=4):
        x = self.data_points_filtered
        if until is not None:
            until_filtered = find_index_of_last_timestamp_before(x, self.data_points[until][0])
            if until_filtered < 0:
                self.prediction_failed_in_fit = True
                logger("MODEL-FIT").warn("Prediction failed in fit phase")
                return
            x = self.data_points_filtered[:until_filtered]
        self.pred_stride = int(len(assessment_windows) * self.pred_stride)
        self.horizon = max_horizon
        logger("MODEL-FIT").debug(
            "num_lags: {} / pred_stride: {} / fit_intercept: {} / horizon: {}".format(self.num_lags,
                                                                                      self.pred_stride,
                                                                                      self.fit_intercept,
                                                                                      self.horizon))
        occurrence_times = [data_point[0] for data_point in x]
        y = np.array(occurrence_times)
        X = y.reshape(-1, 1).copy()

        self.pipeline = self.get_pipeline()
        if self.learning_method == "deep":
            self.pipeline.fit(X[:-1].astype(np.float32), y[:-1].astype(np.float32))
        else:
            self.pipeline.fit(X[:-1], y[:-1])
def sktime_case_string_of(observations, observation_identified_flags, label,
                          every_m_observations_for_dimension=None):
    if len(observations) == 0:
        return None

    if len(observations) != len(observation_identified_flags):
        return None

    no_dimensions = len(observations[0])
    if every_m_observations_for_dimension is None:
        every_m_observations_for_dimension = [1 for i in range(0, no_dimensions)]
    elif len(every_m_observations_for_dimension) != no_dimensions:
        logger("MODEL-DATA-PREP").warn("Wrong number of dimensions in every_m_observations_for_dimension parameter.")
        every_m_observations_for_dimension = [1 for i in range(0, no_dimensions)]

    dimension_strings = []
    for d in range(0, no_dimensions):
        every_nth = every_m_observations_for_dimension[d]
        observation_strings = []
        for o_idx, o in enumerate(observations):
            if o_idx % every_nth != 0:
                observation_strings.append('?')
            else:
                if observation_identified_flags[o_idx]:
                    observation_strings.append(str(o[d]))
                else:
                    observation_strings.append('?')
        dimension_strings.append(','.join(observation_strings))
    x_part = ':'.join(dimension_strings)
    return x_part + ':' + str(label)
Exemplo n.º 4
0
 def compute_coocurance(self, documents):
     """ Computes the sparse co-occurance matrix storing only the rows and and values """
     rprint('counting unique tokens')
     V = set()
     for document in documents:
         tokens = self.preprocessor(document, to_tokens = True)
         V     |= set(tokens)
     logger(f'counted {len(V):,d} unique tokens')
     
     # Vocabulary dictionary - map each token to an integer for indexing
     self.V = {k : v for v, k in enumerate(V)}
     
     sparse = {}
     N      = len(documents)
     u      = Update('computing co-occurance matrix : document', N)
     for n, document in enumerate(documents, 1):
         u.increment()
         for tokens in self.preprocessor(document):
             ntokens = len(tokens)
             for t, token1 in enumerate(tokens):
                 # Center token
                 i      = self.V[token1]
                 
                 # Window (forwards only)
                 window = range(t + 1, min(ntokens, t + self.window_size))
                 
                 for w in window:
                     increment = 1 / (w - t)
                     
                     # Token ahead
                     token2    = tokens[w]
                     j         = self.V[token2]
                     
                     # Increment forwards and backwards
                     if (i, j) in sparse:
                         sparse[(i, j)] += increment
                         sparse[(j, i)] += increment
                     else:
                         sparse[(i, j)]  = increment
                         sparse[(j, i)]  = increment
                         
         # Verbose updates every 1000 documents   
         if n % 1000 == 0:
             u.display()
             
     # Final update if not already given
     if n % 1000 != 0:
         u.increment()
         u.display()
     
     rprint('converting to sparse indices and values')
     # Store rows and values
     self.r, self.c = np.array(list(sparse)).T.astype('int32')
     self.x         = np.array(list(sparse.values())).astype('int32')
         
     logger(f'computed co-occurance matrix with {len(self.V) ** 2:,d} elements and {len(self.x):,d} interactions')
     
     self.compute_min_idx()
Exemplo n.º 5
0
 def load(self, path):
     """ Loads vocabulary and sparse co-occurance matrix """
     npz        = np.load(path, allow_pickle = True)
     self.V     = npz['V'].tolist() # Vocabulary
     self.r     = npz['r']          # Rows of non-zero co-occurances
     self.c     = npz['c']          # Cols of non-zero co-occurances
     self.x     = npz['x']          # Vals of non-zero co-occurances
     logger(f'set co-occurance matrix with {len(self.V) ** 2:,d} elements, {len(self.x):,d} interactions, and {len(self.V):,d} unique tokens')
     
     self.compute_min_idx()
Exemplo n.º 6
0
 def dump_vectors(self, path, **kwargs):
     """ Dumps the word and context weight matrices and bias vectors """
     if self.x_min is not None:
         rprint('computing valid mask')
         sp    = sparse.csr_matrix((self.x, (self.r, self.c)))
         valid = np.where(sp.max(axis = 1).A.flatten() > self.x_min)[0]
         np.savez(path, W = self.W, Wc = self.Wc, b = self.b, bc = self.bc, L = self.L, valid = valid)
     else:
         np.savez(path, W = self.W, Wc = self.Wc, b = self.b, bc = self.bc, L = self.L, **kwargs)
     logger(f'dumped vectors at "{path}"')
 def does_pool_match(self, pool, new_random):
     start = 0
     end = -1
     for p in self.pools:
         if pool.id == p.id:
             end = start + int(p.share * self.random_granularity)
             break
         start += int(p.share * self.random_granularity)
     if end == -1:
         logger("random-data-generator").warn(
             "Generated random value did not fit in any pools!!!!")
     return start < new_random <= end
Exemplo n.º 8
0
def extend_mine_data_by_prediction(how_many):
    """
    Predicts new block information and appends it to the mine database table
    :param how_many: how many new rows to predict
    :return: None
    """
    logger("prediction").info(
        "Predicting {0} new records and "
        "adding them to the mine database main table".format(how_many))
    '''
    Create database elements if they do not exist
    '''
    pass
Exemplo n.º 9
0
def update():
    """
    Updates the mine data by fetching new records from the pool web API
    :return: None
    """
    logger("data_fetcher").info("Updating the data")

    # get last block data from database
    last_block_no = get_last_block_no_seen()

    # use slushpool api to update database
    result = update_with_api(last_block_no)
    print(result)
Exemplo n.º 10
0
 def decide(self, current_x, luck_average_windows, assessment_window, horizon_predictions, assessment_windows):
     logger("STEP-PREDICTOR").debug(
         "current_x: {} / avg_windows: {} / assmnt_window: {} / predictions: {} / assmnt_windows: {}".format(
             current_x,
             luck_average_windows,
             assessment_window,
             horizon_predictions,
             assessment_windows
         ))
     if horizon_predictions is None:
         return None
     window_length = TIME_10_MINUTES * assessment_window
     occurrences_count_in_window = 0
     for p in horizon_predictions:
         if 0 < p - current_x[0] <= window_length:
             occurrences_count_in_window += 1
     return occurrences_count_in_window >= self.positive_decision_occurrence_count_threshold
Exemplo n.º 11
0
 def decide(self, current_x, luck_average_windows, assessment_window, horizon_predictions, assessment_windows):
     logger("SKTIME-DECIDE").debug(
         "current_x: {} / avg_windows: {} / assmnt_window: {} / predictions: {} / assmnt_windows: {}".format(
             current_x,
             luck_average_windows,
             assessment_window,
             horizon_predictions,
             assessment_windows
         ))
     if horizon_predictions is None:
         return None
     for wi, w in enumerate(assessment_windows):
         if w > self.horizon:
             break
         if w == assessment_window:
             return horizon_predictions[wi] == 'true'
     return None
Exemplo n.º 12
0
 def predict(self, luck_average_windows, assessment_windows, from_idx=None):
     logger("MODEL-FIT").debug(
         "num_lags: {} / pred_stride: {} / fit_intercept: {} / horizon: {}".format(self.num_lags,
                                                                                   self.pred_stride,
                                                                                   self.fit_intercept,
                                                                                   self.horizon))
     x = self.data_points
     strengths = self.aggregator.aggregate_lucks(x, luck_average_windows)
     stengths_serie = [s[1] for s in strengths]
     y = np.array(stengths_serie)
     X = y.reshape(-1, 1).copy()
     result = []
     if self.learning_method == "deep":
         if from_idx is None:
             prediction = self.pipeline.predict(X.astype(np.float32), start_idx=len(X) - 1, to_scale=True)
             predictions = []
             for h in assessment_windows:
                 if h <= self.horizon:
                     predictions.append(prediction[h - 1])
             result.append(predictions)
         else:
             prediction = self.pipeline.predict(X.astype(np.float32), start_idx=from_idx, to_scale=True)
             for p in prediction:
                 predictions = []
                 for h in assessment_windows:
                     if h <= self.horizon:
                         predictions.append(p[h - 1])
                 result.append(predictions)
     else:
         if from_idx is None:
             prediction = self.pipeline.predict(X, start_idx=len(X) - 1)
             predictions = []
             for h in assessment_windows:
                 if h <= self.horizon:
                     predictions.append(prediction[h - 1])
             result.append(predictions)
         else:
             prediction = self.pipeline.predict(X, start_idx=from_idx)
             for p in prediction:
                 predictions = []
                 for h in assessment_windows:
                     if h <= self.horizon:
                         predictions.append(p[h - 1])
                 result.append(predictions)
     return result
Exemplo n.º 13
0
def update_with_api(last_block_no):
    """
    Gets blocks data from slushpool API, if last block is included in 15 blcok data, update database accordingly, 
    otherwise the last block value to be updated using web scrapping
    :return: last block value to be scrapped, if none required returns 0
    """
    # temporary
    last_block_no = 641547
    # using slushpool api, get data of last 15 blocks
    url = "https://slushpool.com/stats/json/btc/"
    token = get_slush_account_token()
    headerVar = {"X-SlushPool-Auth-Token": token}
    result = requests.get(url, headers=headerVar)
    data = result.json()

    # parse json data, check if last_block_no is included in data retrieved
    data = data["btc"]
    blocks = data["blocks"]
    isIncluded = False
    blockNoList = []
    for key in blocks.keys():
        if last_block_no == int(key):
            isIncluded = True
        blockNoList.append(int(key))

    # take action based on the fact if last block No. exist in api response or not
    if (not isIncluded):
        return (min(blockNoList))
    else:
        # add data retrieved from API to the database
        logger("data_fetcher").info("Updating the data from pool web API")
        mine_database.switch_to_temporary_copy()
        for key in blocks.keys():
            if int(key) > last_block_no:
                blockData = blocks[key]
                dbRecord = dict()
                dbRecord["date_found"] = blockData["date_found"]
                dbRecord["duration"] = blockData["mining_duration"]
                dbRecord["hash_rate"] = blockData["pool_scoring_hash_rate"]
                dbRecord["difficulty"] = 111111111111111
                # FIXME get block difficulty
                dbRecord["block_no"] = int(key)
                dbRecord["block_value"] = blockData["value"]
                print(dbRecord)
Exemplo n.º 14
0
    def fit(self, luck_average_windows, assessment_windows, until=None, max_horizon=9 * 6):
        logger("MODEL-FIT").debug(
            "max_horizon: {} / avg windows: {} / assmnt windows: {} / until: {} / total_data_size: {}".format(
                max_horizon,
                str(luck_average_windows),
                str(assessment_windows),
                until,
                len(self.data_points)))
        if until is not None and (until < 0 or until >= len(self.data_points)):
            logger("MODEL-FIT").error("Parameter until is too large for the given data points: {}".format(until))
            return
        self.horizon = max_horizon
        for wi, w in enumerate(assessment_windows):
            if w > self.horizon:
                break
            # prepare data frame for sktime package

            temporary_data_fit_file = self.prepare_ts_file(0, len(self.data_points) if until is None else until,
                                                           self.case_observation_size, wi, w)

            # parse data frames from the temporary fit data file
            X, y = load_from_tsfile_to_dataframe(temporary_data_fit_file, replace_missing_vals_with="-100")
            # which label is the first one?
            true_index = 0
            if y[0] == "false":
                true_index = 1
            new_class_weights = self.create_class_weight_dict(true_index=true_index)
            estimators = []
            for i in range(0, len(luck_average_windows)):
                estimators.append(("TSF{}".format(i), TimeSeriesForestClassifier(
                    n_estimators=int(self.no_estimators),
                    n_jobs=16,
                    max_depth=self.max_depth,
                    class_weight=new_class_weights,
                    criterion=self.criterion,
                    min_samples_split=self.min_samples_split,
                    min_samples_leaf=self.min_samples_leaf,
                    oob_score=self.oob_score,
                    bootstrap=self.bootstrap),
                                   [i]))
            c = ColumnEnsembleClassifier(estimators=estimators)
            c.fit(X, y)
            # print(str(c.classes_))
            self.classifiers.append(c)
Exemplo n.º 15
0
    def predict(self, luck_average_windows, assessment_windows, from_idx=None):
        logger("MODEL-FIT").debug(
            "num_lags: {} / pred_stride: {} / fit_intercept: {} / horizon: {}".format(self.num_lags,
                                                                                      self.pred_stride,
                                                                                      self.fit_intercept,
                                                                                      self.horizon))
        from_idx = len(self.data_points) - 1 if from_idx is None else from_idx

        if self.prediction_failed_in_fit:
            return [None for i in range(from_idx, len(self.data_points))]

        from_idx_on_occurrences = find_index_of_last_timestamp_before(self.data_points_filtered,
                                                                      self.data_points[from_idx][0])
        if from_idx_on_occurrences < 0:
            logger("MODEL-PREDICT").warn("No block occurrence is found before the given points")
            return [None for i in range(from_idx, len(self.data_points))]

        # Find prediction on filtered data
        occurrence_timestamps = [data_point[0] for data_point in self.data_points_filtered]
        y = np.array(occurrence_timestamps)
        X = y.reshape(-1, 1).copy()

        to_scale = False
        if self.learning_method == "deep":
            X = X.astype(np.float32)
            to_scale = True
        future_points_prediction = self.pipeline.predict(X, start_idx=from_idx_on_occurrences, to_scale=to_scale)

        result = []
        # For each requested point, check if there is any close occurrence point to use for prediction
        for i in range(from_idx, len(self.data_points)):
            data_point = self.data_points[i]
            last_filtered_index = find_index_of_last_timestamp_before(self.data_points_filtered, data_point[0])
            if last_filtered_index < 0 or (data_point[0] -
                                           self.data_points_filtered[last_filtered_index][
                                               0]) > self.too_late_to_predict_time_threshold:
                result.append(None)
            else:
                data_point_last_prediction = future_points_prediction[last_filtered_index - from_idx_on_occurrences]
                prediction_age = data_point[0] - self.data_points_filtered[last_filtered_index][0]
                result.append([p - prediction_age for p in data_point_last_prediction])
        return result
Exemplo n.º 16
0
def case_algorithm(algorithm,
                   data_handler,
                   luck_average_windows,
                   assessment_average_windows,
                   pool_name,
                   step_predictor=False):
    ## Scikit

    default_parameters = {
        "no_estimators": 150,
        "case_observation_size": 24 * TICKS_HOUR,
        "prediction_above_one_margin": 0,
        "round_to_n_decimal_points": 7,
        "class_weight": None,
        "max_depth": 3,
        "criterion": 'entropy',
        "min_samples_split": 10,
        "min_samples_leaf": 1,
        "bootstrap": True,
        "oob_score": True,
    }

    logger("CASES").info(
        "========================================================== base")
    case(data_handler,
         luck_average_windows=luck_average_windows,
         assessment_average_windows=assessment_average_windows,
         pool_name=pool_name,
         no_estimators=default_parameters["no_estimators"],
         predictor_class="scikit",
         case_observation_size=default_parameters["case_observation_size"],
         prediction_above_one_margin=default_parameters[
             "prediction_above_one_margin"],
         round_to_n_decimal_points=default_parameters[
             "round_to_n_decimal_points"],
         class_weight=default_parameters["class_weight"],
         max_depth=default_parameters["max_depth"],
         criterion=default_parameters["criterion"],
         min_samples_split=default_parameters["min_samples_split"],
         min_samples_leaf=default_parameters["min_samples_leaf"],
         bootstrap=default_parameters["bootstrap"],
         oob_score=default_parameters["oob_score"])
    def update_pools_db_with_occurrences(self):
        now_timestamp = get_now_timestamp()
        start_timestamp = now_timestamp - self.all_time_range
        # if there are occurrences inserted from before, continue on top of that
        latest_timestamp = block_data.get_latest_pool_block_occurrence_timestamp(
        )
        if latest_timestamp is not None:
            start_timestamp = latest_timestamp + self.step_size
        seed(int(datetime.now().timestamp()))
        block_no = 10000
        while start_timestamp < now_timestamp:
            logger("random-data-generator").debug(
                "Block # {} processed.".format(block_no))
            new_random = randint(1, self.random_granularity)
            matching_pool = None
            for p in self.pools:
                if self.does_pool_match(p, new_random):
                    matching_pool = p
                    break
            # Update occurrence
            block_data.insert_pool_block_occurrence(start_timestamp,
                                                    matching_pool.id, block_no)
            self.pool_stats[matching_pool].add_point_and_update(
                start_timestamp)

            # Update luck tables
            self.update_luck_tables_after_one_step(start_timestamp)

            start_timestamp += self.step_size
            block_no += 1
        # Update assessments
        all_block_occurrences = block_data.get_all_block_occurrences()
        for row in all_block_occurrences:
            matching_pool = None
            for p in self.pools:
                if p.id == row[1]:
                    matching_pool = p
                    break
            self.pool_assessment_stats[matching_pool].add_point_and_update(
                row[0])
            # Update assessment tables
            self.update_luck_tables_after_one_step(row[0], mode="assessments")
Exemplo n.º 18
0
    def predict(self, luck_average_windows, assessment_windows, from_idx=None):
        logger("MODEL-PREDICT").debug(
            "horizon: {} / avg windows: {} / assmnt windows: {} / from_idx: {} / total_data_size: {}".format(
                self.horizon,
                str(luck_average_windows),
                str(assessment_windows),
                from_idx,
                len(self.data_points)))
        if from_idx is not None and (from_idx < 0 or from_idx >= len(self.data_points)):
            logger("MODEL-PREDICT").error("Parameter until is too large for the given data points: {}".format(from_idx))
            return
        from_idx = len(self.data_points) - 1 if from_idx is None else from_idx
        y_predictions = [[] for i in range(from_idx, len(self.data_points))]
        for wi, w in enumerate(assessment_windows):
            if w > self.horizon:
                break
            # prepare data frame for sktime package
            temporary_data_fit_file = self.prepare_ts_file(from_idx - self.case_observation_size, len(self.data_points),
                                                           self.case_observation_size, wi, w)
            X, y = load_from_tsfile_to_dataframe(temporary_data_fit_file, replace_missing_vals_with="-100")
            y_prediction = self.classifiers[wi].predict(X)
            for pred_point_index, y_point_prediction in enumerate(y_prediction):
                y_predictions[pred_point_index].append(y_point_prediction)

        logger("MODEL-PREDICT").debug("Predictions: {}".format(y_predictions))
        return y_predictions
Exemplo n.º 19
0
    def fit(self, luck_average_windows, assessment_windows, until=None, max_horizon=9 * 6):
        x = self.data_points
        if until is not None:
            x = self.data_points[:until]
        self.pred_stride = int(len(assessment_windows) * self.pred_stride)

        self.horizon = max_horizon
        logger("MODEL-FIT").debug(
            "num_lags: {} / pred_stride: {} / fit_intercept: {} / horizon: {}".format(self.num_lags,
                                                                                      self.pred_stride,
                                                                                      self.fit_intercept,
                                                                                      self.horizon))
        strengths = self.aggregator.aggregate_lucks(x, luck_average_windows)
        stengths_serie = [s[1] for s in strengths]
        y = np.array(stengths_serie)
        X = y.reshape(-1, 1).copy()

        self.pipeline = self.get_pipeline()
        if self.learning_method == "deep":
            self.pipeline.fit(X[:-1].astype(np.float32), y[:-1].astype(np.float32))
        else:
            self.pipeline.fit(X[:-1], y[:-1])
Exemplo n.º 20
0
 def __init__(self, timestamps, data_points, correct_decision_labels,
              no_estimators=100,
              filter_object=None,
              case_observation_size=24 * 6,
              every_m_observations_for_dimension=None,
              class_weight="balanced",
              max_depth=5,
              criterion='entropy',
              min_samples_split=2,
              min_samples_leaf=1,
              bootstrap=False,
              oob_score=False):
     super().__init__(data_points, no_estimators=no_estimators)
     """
     :param correct_decision_labels: parallel list with assessment windows. List of True/False data points label lists
     """
     if len(timestamps) != len(data_points) or len(timestamps) != len(correct_decision_labels):
         logger("MODEL-CREATE").error(
             "Failed to create predictor because of inconsistent length of timestamp/x/y lists")
         return
     self.timestamps = timestamps
     self.correct_decision_labels = correct_decision_labels
     self.filter = filter_object
     self.data_points_filter_results = []
     self.case_observation_size = case_observation_size
     self.every_m_observations_for_dimension = every_m_observations_for_dimension
     self.class_weight = class_weight
     self.max_depth = max_depth
     self.criterion = criterion
     self.min_samples_split = min_samples_split
     self.min_samples_leaf = min_samples_leaf
     self.bootstrap = bootstrap
     self.oob_score = oob_score
     # The following list will contain one classifier per assessment window
     self.classifiers = []
     self.horizon = 9 * 6
Exemplo n.º 21
0
    def run(self):
        """
        Executes the ticks one by one from the beginning until the end
        :return:
        """
        logger_object = logger("tester")

        # pre tick
        self.Algorithm.pre_ticks(self)
        # run ticks
        for tick_index in range(len(self.r.RuntimeTicks)):
            self.r.current_run_tick_index = tick_index
            tick = self.r.RuntimeTicks[tick_index]
            tick.run(self)
        # post tick
        self.Algorithm.post_ticks(self)

        # print cost and reward
        logger_object.info("Cost: {0:.3f} - Reward: {1:0.3f} - R/C%: [ {3:.3f} >> {2:.3f} << {4:.3f} ]".format(
            self.r.total_cost, self.r.total_reward, (self.r.total_reward * 100) / self.r.total_cost,
            self.r.statistics.profit_min, self.r.statistics.profit_max))
        return self.r.total_cost, self.r.total_reward, (self.r.total_reward * 100) / self.r.total_cost, \
            self.r.statistics.profit_min, self.r.statistics.profit_max
Exemplo n.º 22
0
 def dump_co_occurance(self, path, **kwargs):
     """ Dumps the vocabulary and co-occurance matrix """
     np.savez(path, V = self.V, r = self.r, c = self.c, x = self.x, **kwargs)
     logger(f'dumped co-occurance at "{path}"')
Exemplo n.º 23
0
    def fit(self, vector_size, eta = 1e-4, epochs = 100, optimiser = 'adagrad', stop = None, tau = 1e-7, **optimiser_kwargs):
        
        if isinstance(optimiser, str):
            optimiser = get_optimiser(optimiser)
        
        logger(f'fitting with vector size = {vector_size:,d}')
        
        r, c, x    = self.r, self.c, self.x
        
        # Filter out not frequent enough co-occurances
        if self.x_min is not None:
            _r, _c, x = r[self._idx], c[self._idx], x[self._idx]
            ur        = {r : i for i, r in enumerate(np.unique(_r))}
            uc        = {c : i for i, c in enumerate(np.unique(_c))}
            r         = np.array([ur[r] for r in _r]).astype('int32')
            c         = np.array([uc[c] for c in _c]).astype('int32')
            
            # Free memory
            del _r, _c, ur, uc; gc.collect()
            
        # Compute max if not set, then cap values
        x_max      = x.max() if self.x_max is None else self.x_max
        if self.x_max is not None:
            rprint('setting x_max upper bound')
            _x     = np.minimum(x, x_max)
            
            rprint('precomputing f(X)')
            fx     = (_x / x_max) ** self.alpha
            
            # Free memory
            del _x; gc.collect()
        else:
            rprint('precomputing f(X)')
            fx     = (x / x_max) ** self.alpha
        
        rprint('precomputing log(X)')
        lx     = np.log(x)
        
        
        # Free memory
        del x; gc.collect()
        
        np.random.seed(self.random_state)
        
        shape     = len(np.unique(r)), vector_size
        
        rprint('initialising word vectors and bias vector variables')
        W1        = np.random.normal(scale = 0.5, size = shape).astype('float32')
        W2        = np.random.normal(scale = 0.5, size = shape).astype('float32')
        b1        = np.random.normal(scale = 0.5, size = shape[0]).astype('float32')
        b2        = np.random.normal(scale = 0.5, size = shape[0]).astype('float32')
        
        # As sparse matrix may have multiple entries per row, compute these entries before hand for later ease
        rprint('computing masks for optimisation')
        rmasks = {}
        cmasks = {}
        
        for d, masks in zip([r, c], [rmasks, cmasks]):
            for i, val in enumerate(d):
                if val not in masks:
                    masks[val] = []
                masks[val] += [i]
        
        # Free memory (masks is linked to cmasks so cannot delete it)
        del d; gc.collect()
        
        # Initialise optimisers (W1, W2, b)
        optim     = [optimiser(eta = eta, **optimiser_kwargs) for _ in range(3)]
        logger(f'initialised variables')
        
        u         = Update('optimising epoch', epochs)
        L         = self.L = np.ones(epochs + 1) * np.inf
        N         = fx.sum()
        lo        = np.inf
        for i in range(epochs):
            
            # Early stopping condition if over the last "stop" iterations there is a total variation of less than "tau"
            if stop is not None and i >= stop:
                if (L[i - stop: i].max() / L[i - stop: i].min() - 1) <= tau:
                    break
                
            delta           = (W1[r] * W2[c]).sum(axis = 1) + b1[r] + b2[c] - lx
            L[i]            = np.mean(fx * np.square(delta))
            
            # Store the best
            if L[i] < lo:
                best = [W1.copy(), W2.copy(), b1.copy(), b2.copy()]
                lo   = L[i]
    
            # Chain rule of loss function of the form L = fx * (delta ^ 2) w.r.t. delta (ignoring proportional constants)
            chain = (fx * delta)

            # Compute gradients to update W and b i.e. differentiate delta w.r.t W and b respectively
            #
            # Steps:
            #   • Compute adjusted gradients using optimiser
            #   • Aggregate gradients for each token (row of W)
            #   • Update parameter
            #   • Free space to reduce memory cost
            #
            # Do for W1 (optim[0]), W2 (optim[1]), b1 (optim[2]), b2 (optim[2])
            # Gradients for b1 and b2 are similar just with different aggregation masks r and c
            gw1   = optim[0](np.einsum('c,cv->cv', chain, W2[c]).astype('float32'))
            gW1   = np.zeros_like(W1)
            for j, mask in rmasks.items():
                gW1[j] += gw1[mask].mean(axis = 0)
            W1   -= gW1
            del gw1, gW1; gc.collect()
            
            gw2   = optim[1](np.einsum('c,cv->cv', chain, W1[r]).astype('float32'))
            gW2   = np.zeros_like(W2)
            for j, mask in cmasks.items():
                gW2[j] += gw2[mask].mean(axis = 0)
            W2   -= gW2
            del gw2, gW2; gc.collect()
            
            # Common gradients for b1 and b2 with different aggregations
            gb    = optim[2](chain.astype('float32'))
            
            gb1   = np.zeros_like(b1)
            for j, mask in rmasks.items():
                gb1[j] += gb[mask].mean(axis = 0)
            b1   -= gb1
            del gb1; gc.collect()
            
            gb2   = np.zeros_like(b2)
            for j, mask in cmasks.items():
                gb2[j] += gb[mask].mean(axis = 0)
            b2   -= gb2
            del chain, gb2; gc.collect()
            
            # Verbose update
            u.increment()
            u.display(loss = L[i], best = lo)
        else:
            # Enters the else statement only if the for loop completes without break
            i += 1
            
        delta  = (W1[r] * W2[c]).sum(axis = 1) + b1[r] + b2[c] - lx
        L[i]   = np.sum(fx * np.square(delta)) / N
        
        if L[i] == L.min():
            best = [W1.copy(), W2.copy(), b1.copy(), b2.copy()]
                
        self.W, self.Wc, self.b, self.bc = best
        self.L = L[:i + 1]
        logger(f'optimised over {i:,d} epochs (best loss = {min(L):,.3e}, final loss = {L[i]:,.3e})')

        return self
Exemplo n.º 24
0
def optimize():
    """
    Finds the best values for the prediction
    :return:
    """
    logger("optimizer").info("Tuning prediction parameters")
Exemplo n.º 25
0
 def compute_min_idx(self):
     if self.x_min is not None:
         self._idx = np.where(self.x_min <= self.x)[0]
         logger(f'{len(self._idx):,d} interactions above x_min = {self.x_min}')
Exemplo n.º 26
0
 def __init__(self):
     self.logger = logger("Algorithm-3-Hour")
Exemplo n.º 27
0
    # logger("main").info("Minimums\t|\t{0:.3f}\t{1:.3f}\t      {3:.3f} >> {2:.3f} << {4:.3f}".format(*minimums))
    # logger("main").info("Maximums\t|\t{0:.3f}\t{1:.3f}\t      {3:.3f} >> {2:.3f} << {4:.3f}".format(*maximums))


if __name__ == "__main__":
    luck_average_windows = prepare_average_luck_windows()
    assessment_average_windows = prepare_average_assessment_windows()
    pools = prepare_pools()
    # predictor.populate_db_with_random(pools, luck_average_windows, assessment_average_windows)
    table_names = block_data.get_list_of_table_names(which_db="pools")
    print(str(table_names))
    data_handler = predictor.create_data_handler(pools, luck_average_windows,
                                                 assessment_average_windows)
    pool_names = ["SLUSHPOOL", "BTCCOM", "VIABTC"]
    for pool_name in pool_names[:1]:
        logger("RESULTS").info("Pool: {}".format(pool_name))
        # Combination example
        # Booster
        # case_algorithm("booster", data_handler, luck_average_windows, assessment_average_windows, pool_name,
        #                step_predictor=True)
        # Linear
        case_algorithm("linear",
                       data_handler,
                       luck_average_windows,
                       assessment_average_windows,
                       pool_name,
                       step_predictor=True)
        # Linear
        # algorithm_tester.add_algorithm([
        #     StrengthPredictor(learning_method="linear", aggregator=Aggregator(method="strength"),
        #                       num_lags=10, pred_stride=1, fit_intercept=False,
Exemplo n.º 28
0
def case(data_handler,
         luck_average_windows,
         assessment_average_windows,
         pool_name,
         cases=None,
         method="linear",
         aggr_method="strength",
         aggr_avg_window_idx=6,
         lags=5,
         stride=0.5,
         no_estimators=50,
         too_late_to_predict_time_threshold=1.5 * TIME_10_MINUTES,
         positive_decision_occurrence_count_threshold=2,
         decision_aggregation_method="and",
         predictor_class="aggregation",
         data_filter=None,
         case_observation_size=24 * 6,
         prediction_above_one_margin=0.5,
         round_to_n_decimal_points=5,
         class_weight=None,
         max_depth=5,
         criterion='entropy',
         min_samples_split=2,
         min_samples_leaf=1,
         bootstrap=False,
         oob_score=False):
    """
    :param predictor_class: aggregation or step or scikit
    :return:
    """
    logger("==========================================").info("")
    if cases is None:
        logger("CASE").info("{}-{}-{}-{}-{}-{}-{}".format(
            predictor_class, method, aggr_method,
            (aggr_avg_window_idx if aggr_avg_window_idx is not None else ""),
            lags, stride, no_estimators))
    else:
        logger("CASE").info("-- COMBINATION --")
        for test_case in cases:
            logger("CASE").info("{}-{}-{}-{}-{}-{}-{}".format(
                predictor_class, test_case[0], test_case[1],
                (test_case[2] if test_case[2] is not None else ""),
                test_case[3], test_case[4], test_case[5]))
        logger("CASE").info("-----------------")
    sum_results = None
    no_exp_repeats = 10
    for day_offset in range(no_exp_repeats, 0, -1):
        data_handler.set_main_configs_for_input_data_preparation(
            no_days_offset=(day_offset - 1) * 3)
        x, y = predictor.export_pool_data_points_for_training(
            data_handler,
            pool_name,
            round_to_n_decimal_points=round_to_n_decimal_points)
        data_points_filter = None
        if predictor_class == "step" or predictor_class == "scikit":
            data_points_filter = data_filter
            if data_points_filter is not None:
                data_points_filter.init(x)
        algorithm_tester = AlgorithmTester(luck_average_windows,
                                           assessment_average_windows, x, y)
        # Booster
        if cases is None:
            if predictor_class == "aggregation":
                algorithm_tester.add_algorithm([
                    StrengthPredictor(learning_method=method,
                                      aggregator=Aggregator(
                                          method=aggr_method,
                                          avg_window_idx=aggr_avg_window_idx),
                                      num_lags=lags,
                                      pred_stride=stride,
                                      fit_intercept=False,
                                      success_hardness_factor=1,
                                      no_estimators=no_estimators)
                ])
            elif predictor_class == "step":
                algorithm_tester.add_algorithm([
                    StepPredictor(x,
                                  learning_method=method,
                                  aggregator=Aggregator(
                                      method=aggr_method,
                                      avg_window_idx=aggr_avg_window_idx),
                                  num_lags=lags,
                                  pred_stride=stride,
                                  fit_intercept=False,
                                  no_estimators=no_estimators,
                                  filter_object=data_points_filter,
                                  too_late_to_predict_time_threshold=
                                  too_late_to_predict_time_threshold,
                                  positive_decision_occurrence_count_threshold=
                                  positive_decision_occurrence_count_threshold)
                ])
            elif predictor_class == "scikit":
                # prepare scikit friendly x
                x_without_timestamp = []
                x_only_timestamp = []
                for data_point in x:
                    x_without_timestamp.append(data_point[1:])
                    x_only_timestamp.append(data_point[0])
                # prepare classification labeling based on assessments
                decision_labels = []
                for data_point_assessments in y:
                    decision_labels.append([
                        (assessment >= 1 + prediction_above_one_margin)
                        for assessment in data_point_assessments[1:]
                    ])
                algorithm_tester.add_algorithm([
                    SciKitPredictor(
                        x_only_timestamp,
                        x_without_timestamp,
                        decision_labels,
                        no_estimators=no_estimators,
                        filter_object=data_points_filter,
                        case_observation_size=case_observation_size,
                        every_m_observations_for_dimension=[
                            get_every_nth_value_for_average_window(avg_window)
                            for avg_window in luck_average_windows
                        ],
                        class_weight=class_weight,
                        max_depth=max_depth,
                        criterion=criterion,
                        min_samples_split=min_samples_split,
                        min_samples_leaf=min_samples_leaf,
                        bootstrap=bootstrap,
                        oob_score=oob_score)
                ])

        else:
            for test_case in cases:
                if predictor_class == "aggregation":
                    algorithm_tester.add_algorithm([
                        StrengthPredictor(learning_method=test_case[0],
                                          aggregator=Aggregator(
                                              method=test_case[1],
                                              avg_window_idx=test_case[2]),
                                          num_lags=test_case[3],
                                          pred_stride=test_case[4],
                                          fit_intercept=False,
                                          success_hardness_factor=1,
                                          no_estimators=test_case[5])
                    ])
                elif predictor_class == "step":
                    algorithm_tester.add_algorithm([
                        StepPredictor(
                            x,
                            learning_method=test_case[0],
                            aggregator=Aggregator(method=test_case[1],
                                                  avg_window_idx=test_case[2]),
                            num_lags=test_case[3],
                            pred_stride=test_case[4],
                            fit_intercept=False,
                            no_estimators=test_case[5],
                            filter_object=data_points_filter,
                            too_late_to_predict_time_threshold=test_case[6],
                            positive_decision_occurrence_count_threshold=
                            test_case[7])
                    ])
        max_horizon = 1000000
        results = algorithm_tester.test_algorithms(
            decision_aggregation_method=decision_aggregation_method,
            max_horizon=max_horizon,
            test_size=100)
        if sum_results is None:
            sum_results = results
        else:
            new_results = []
            for i, w in enumerate(assessment_average_windows):
                if w > max_horizon:
                    continue
                last_window_sum = sum_results[i]
                current_result = results[i]
                new_sum = None
                for last_window_sum_idx in range(0, len(last_window_sum)):
                    if last_window_sum_idx == 0:
                        new_sum = (last_window_sum[last_window_sum_idx] +
                                   current_result[last_window_sum_idx], )
                    else:
                        new_sum = new_sum + (
                            last_window_sum[last_window_sum_idx] +
                            current_result[last_window_sum_idx], )
                new_results.append(new_sum)
            sum_results = new_results
        logger("RESULTS-AVG").debug(
            "Day offset: \t Horizon  : S/T\tP/T\tRP/T\tPS/P\tPS/T\tT")
        for i, w in enumerate(assessment_average_windows):
            if w > max_horizon:
                continue
            logger("RESULTS-AVG").debug(
                "Day offset: {} \t Horizon {} : {:.2f}\t{:.2f}\t{}\t{:.2f}\t{:.2f}\t{}"
                .format(day_offset, w, sum_results[i][0] / no_exp_repeats,
                        sum_results[i][1] / no_exp_repeats,
                        sum_results[i][2] / no_exp_repeats,
                        sum_results[i][3] / no_exp_repeats,
                        sum_results[i][4] / no_exp_repeats,
                        sum_results[i][5] / no_exp_repeats))
    logger("RESULTS-AVG").info(
        "Day offset: \t Horizon  : S/T\tP/T\tRP/T\tPS/P\tPS/T\tT")
    for i, w in enumerate(assessment_average_windows):
        if w > max_horizon:
            continue
        logger("RESULTS-AVG").info(
            "Horizon {} : {:.2f}\t{:.2f}\t{}\t{:.2f}\t{:.2f}\t{}".format(
                w, sum_results[i][0] / no_exp_repeats,
                sum_results[i][1] / no_exp_repeats,
                sum_results[i][2] / no_exp_repeats,
                sum_results[i][3] / no_exp_repeats,
                sum_results[i][4] / no_exp_repeats,
                sum_results[i][5] / no_exp_repeats))
Exemplo n.º 29
0
def extend_mine_data_by_prediction(how_many):
    """
    Predicts new block information and appends it to the mine database table
    :param how_many: how many new rows to predict
    :return: None
    """
    logger("prediction").info(
        "Predicting {0} new records and "
        "adding them to the mine database main table".format(how_many))
    '''
    Create database elements if they do not exist
    '''
    pass


logger("prediction").info("Setting up the predictor")


def populate_db_with_random(pools, luck_average_windows,
                            assessment_average_windows):
    block_data.switch_to_temporary_copy(which_db="pools")
    data_handler = RandomPoolDataHandler(pools, luck_average_windows,
                                         assessment_average_windows)
    data_handler.initialize()
    data_handler.update_pools_db_with_occurrences()
    # data_handler.update_luck_tables()
    block_data.switch_to_main_copy(save_temporary_copy=True,
                                   remove_temporary_copy=True,
                                   which_db="pools")
    # block_data.print_all_pools_data()
Exemplo n.º 30
0
def generate_plots():
    """
    Generates all plots from the mine database and saves them as PNG files
    :return: None
    """
    logger("plots").info("Generating plots")