示例#1
0
def _check_intersect(s, h1, h2):
    # When two houses intersect,
    # some numbers cannot at the same time be outside of the intersection
    # Thus they must be inside the intersection
    changed = 0
    log.append("In House {0} & {1}:", h1, h2)
    h1u = util.difference(h1, h2)  # h1 unique tiles
    h2u = util.difference(h2, h1)  # h2 unique tiles
    h1un = s.get_numbers(h1u)  # h1 unique numbers
    h2un = s.get_numbers(h2u)  # h2 unique numbers
    h1i = Sudoku.other(h1un)  # h1 intersect-only numbers
    h2i = Sudoku.other(h2un)  # h2 intersect-only numbers
    hi = util.union(h1i, h2i)  # intersect-only numbers
    if len(hi) > 0:
        @log.rollback
        def rem():
            log.indent()
            log.append("Intersection contains {0}", hi)
            log.indent()
            result = max(section_remove(s, h2u, h1i), section_remove(s, h1u, h2i))
            log.dedent(2)
            return result

        changed = rem()
    return changed
示例#2
0
def model_LinearRegression(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0):
    predictions = []
    rmse_val = []

    if train_flag == 1:
        itr = 5
        for i in range(3):
            expected = pd.DataFrame(dataset)
            expected = expected.tail(itr).head(3).reset_index(drop=True)

            train = dataset[:-itr]
            diff_values = ut.difference(dataset, order[1])

            scaler = ut.scaler_selection('lr')
            diff_values = scaler.fit_transform(
                pd.DataFrame(diff_values).values.reshape(-1, 1))

            supervised = ut.timeseries_to_supervised(diff_values, order[0])
            data = supervised.values

            clf = LinearRegression()
            param = {
                "fit_intercept": [True, False],
                "normalize": [False],
                "copy_X": [True, False]
            }
            grid = GridSearchCV(clf, param, n_jobs=1)
            model = mu.fit_model(data, grid)

            for j in range(tsize):
                X, y = data[:, 0:-1], data[:, -1]
                yhat = mu.forecast_model(model, X)

                #    inverted = list()
                #    for i in range(len(yhat)):
                #        value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i)
                #        inverted.append(value)
                #    inverted = np.array(inverted)

                forecast = yhat[-1]
                if forecast < 0:
                    forecast = mu.weighted_moving_average(dataset, 1, 3)[0]

                predictions.append(forecast)
                train = np.append(train, forecast)
                diff_train = ut.difference(train, order[1])
                diff_train = scaler.fit_transform(
                    pd.DataFrame(diff_train).values.reshape(-1, 1))

                supervised = ut.timeseries_to_supervised(train, order[0])
                data = supervised.values

            predictions = predictions[1:4]
            rmse_val.append(mu.calculate_rmse('GR_LR', expected, predictions))
            itr = itr - 1

    predictions = [int(i) for i in predictions]
    return predictions, rmse_val
示例#3
0
 def __init__(self, width, height, n_rivers=3, thickness=2, inverted=False, map_group=None, turn_limit=None, can_escape=True, max_view_distance=None):
     import util
     super().__init__(map_group, turn_limit, can_escape=can_escape, max_view_distance=max_view_distance)
     brush = [
         ['#' if inverted else '.'] * thickness
     ] * thickness
     self._fill(width, height, '.' if inverted else '#')
     for _i in range(n_rivers):
         roll = random.randint(0,3)
         if roll == 0:
             point = (0, random.randint(0, height-1))
         elif roll == 1:
             point = (width-1, random.randint(0, height-1))
         elif roll == 2:
             point = (random.randint(0, width-1), 0)
         elif roll == 3:
             point = (random.randint(0, width-1), height-1)
         center = (width/2, height/2)
         delta = util.normalize(util.difference(point, center))
         orig_delta = delta
         print(delta)
         turn_l = util.rot_ccw_90(orig_delta)
         turn_r = util.rot_cw_90(orig_delta)
         iterations = 0
         while self._in_bounds(point, width, height):
             self._apply_brush(brush, point, width, height)
             if iterations > width / 3:
                 pert = util.normalize(
                     util.add(delta, util.scalar_mult(random.choice([turn_l, turn_r]), 0.7 + 0.3*random.random())))
                 delta = util.normalize(util.add(delta, pert))
             point = util.add(point, delta)
             iterations += 1
     self._place_stairs(width, height)
示例#4
0
 def rem():
     log.indent()
     log.append("Group {0} contains {1}", subset, all_numbers)
     log.indent()
     result = section_remove(s, util.difference(unsettled_tiles, subset), all_numbers)
     log.dedent(2)
     return result
示例#5
0
def model_DecisionTree(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0):
    predictions = []
    for i in range(tsize):
        diff_values = ut.difference(dataset, 1)
        supervised = ut.timeseries_to_supervised(diff_values, 1)
        data = supervised.values

        if train_flag == 1:
            train = data[0:-tsize]
        else:
            train = data

        X, y = train[:, 0:-1].reshape(-1, 1), train[:, -1]

        dtr = DecisionTreeRegressor()
        param_tree = {
            "max_depth": [3, None],
            "min_samples_leaf": sp_randint(1, 11),
            "criterion": ["mse"],
            "splitter": ["best", "random"],
            "max_features": ["auto", "sqrt", None]
        }

        gridDT = RandomizedSearchCV(dtr, param_tree, n_jobs=1, n_iter=100)
        gridDT.fit(X, y)
        clf = DecisionTreeRegressor(
            criterion=gridDT.best_params_["criterion"],
            splitter=gridDT.best_params_["splitter"],
            max_features=gridDT.best_params_["max_features"],
            max_depth=gridDT.best_params_["max_depth"],
            min_samples_leaf=gridDT.best_params_["min_samples_leaf"])

        clf.fit(X, y)
        yhat = mu.forecast_model(clf, X)

        inverted = list()
        for i in range(len(yhat)):
            value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i)
            inverted.append(value)
        inverted = np.array(inverted)

        forecast = inverted[-1]
        if forecast < 0:
            forecast = mu.weighted_moving_average(dataset, 1, 3)[0]
        predictions.append(forecast)
        dataset = np.append(dataset, forecast)
    predictions = [int(i) for i in predictions]
    return predictions
示例#6
0
def model_ElasticNet(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0):
    predictions = []
    for i in range(tsize):
        diff_values = ut.difference(dataset, 1)
        supervised = ut.timeseries_to_supervised(diff_values, 1)
        data = supervised.values

        if train_flag == 1:
            train = data[0:-tsize]
        else:
            train = data

        X, y = train[:, 0:-1].reshape(-1, 1), train[:, -1]
        elas = ElasticNet()
        param = {
            "alpha": list(np.linspace(0.000000001, 100, 100000)),
            "l1_ratio": list(np.linspace(0.000001, 100, 1000)),
            "fit_intercept": [True, False],
            "normalize": [True, False],
            "precompute": [True, False]
        }
        random_elas = RandomizedSearchCV(elas, param, n_jobs=1, n_iter=100)
        random_elas.fit(X, y)
        clf = ElasticNet(
            alpha=random_elas.best_params_["alpha"],
            l1_ratio=random_elas.best_params_["l1_ratio"],
            fit_intercept=random_elas.best_params_["fit_intercept"],
            normalize=random_elas.best_params_["normalize"],
            precompute=random_elas.best_params_["precompute"])

        clf.fit(X, y)
        yhat = mu.forecast_model(clf, X)

        inverted = list()
        for i in range(len(yhat)):
            value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i)
            inverted.append(value)
        inverted = np.array(inverted)

        forecast = inverted[-1]
        if forecast < 0:
            forecast = mu.weighted_moving_average(dataset, 1, 3)[0]
        predictions.append(forecast)
        dataset = np.append(dataset, forecast)
    predictions = [int(i) for i in predictions]
    return predictions
示例#7
0
def model_SVR_Poly(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0):
    predictions = []
    for i in range(tsize):
        diff_values = ut.difference(dataset, 1)
        supervised = ut.timeseries_to_supervised(diff_values, 1)
        data = supervised.values

        if train_flag == 1:
            train = data[0:-tsize]
        else:
            train = data

        X, y = train[:, 0:-1].reshape(-1, 1), train[:, -1]

        mod = SVR()
        g = list(np.linspace(0.0001, 1, 1000))
        C = list(np.linspace(0.01, 10, 25))
        param = {
            "kernel": ["poly"],
            "degree": range(10, 30, 1),
            "gamma": g,
            "C": C
        }
        random_search = RandomizedSearchCV(mod, param, n_jobs=1, n_iter=100)
        random_search.fit(X, y)
        clf = SVR(kernel=random_search.best_params_["kernel"],
                  degree=random_search.best_params_["degree"],
                  gamma=random_search.best_params_["gamma"],
                  C=random_search.best_params_["C"])

        clf.fit(X, y)
        yhat = mu.forecast_model(clf, X)

        inverted = list()
        for i in range(len(yhat)):
            value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i)
            inverted.append(value)
        inverted = np.array(inverted)

        forecast = inverted[-1]
        if forecast < 0:
            forecast = mu.weighted_moving_average(dataset, 1, 3)[0]
        predictions.append(forecast)
        dataset = np.append(dataset, forecast)
    predictions = [int(i) for i in predictions]
    return predictions
示例#8
0
def model_RandomForest(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0):
    predictions = []
    for i in range(tsize):
        diff_values = ut.difference(dataset, 1)
        supervised = ut.timeseries_to_supervised(diff_values, 1)
        data = supervised.values

        if train_flag == 1:
            train = data[0:-tsize]
        else:
            train = data

        X, y = train[:, 0:-1].reshape(-1, 1), train[:, -1]
        rfr = RandomForestRegressor()
        param_forest = {
            "n_estimators": range(10, 1000, 100),
            "criterion": ["mse"],
            "bootstrap": [True, False],
            "warm_start": [True, False]
        }
        gridRF = RandomizedSearchCV(rfr, param_forest, n_jobs=1, n_iter=100)
        gridRF.fit(X, y)
        yhat = mu.forecast_model(clf, X)

        inverted = list()
        for i in range(len(yhat)):
            value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i)
            inverted.append(value)
        inverted = np.array(inverted)

        forecast = inverted[-1]
        if forecast < 0:
            forecast = mu.weighted_moving_average(dataset, 1, 3)[0]
        predictions.append(forecast)
        dataset = np.append(dataset, forecast)
    predictions = [int(i) for i in predictions]
    return predictions
示例#9
0
                                                       housing["income_cat"]):
    strat_train_set = housing.loc[train_index]
    strat_test_set = housing.loc[test_index]

# Drop income category, no longer needed
for s in (strat_train_set, strat_test_set):
    s.drop("income_cat", axis=1, inplace=True)

# labels = median house value
housing_labels = strat_train_set["median_house_value"].copy()

# Data cleanup: remove data which used to train/label against later
housing = strat_train_set.drop("median_house_value", axis=1)

category_attributes = ["ocean_proximity"]
numeric_attributes = util.difference(list(housing), category_attributes)

# Define our pipeline
# numeric part
numeric_pipeline = Pipeline([
    ("selector", DataFrameSelector(numeric_attributes)),
    ("imputer", SimpleImputer(strategy="median")),
    ("attribs_adder", CombinedAttributesAdder()),
    ("std_scaler", StandardScaler()),
])
# categorial part
category_pipeline = Pipeline([
    ("selector", DataFrameSelector(category_attributes)),
    # https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelBinarizer.html#sklearn.preprocessing.LabelBinarizer
    # ("label_binarizer", LabelBinarizer()),
    ("one_hot", OneHotEncoder(sparse=False)),
示例#10
0
def model_ML(dataset=[],
             tsize=0,
             test_shape=0,
             model=np.nan,
             key='',
             order=(0, 0, 0),
             train_flag=0):
    predictions = []
    pred_temp = []
    rmse_val = []
    parameter_values = {}
    scale_flag = 0
    if key == 'lr' or key == 'lasso' or key == 'ridge' or key == 'knn' or key == 'svmr':
        scale_flag = 1

    if train_flag == 1:
        itr = 5
        for i in range(3):
            expected = pd.DataFrame(dataset)
            expected = expected.tail(itr).head(3)
            expected = expected.reset_index(drop=True)

            train = dataset[:-itr]

            diff_values = ut.difference(train, order[1])

            if scale_flag == 1:
                scaler = ut.scaler_selection(key)
                diff_values = scaler.fit_transform(
                    pd.DataFrame(diff_values).values.reshape(-1, 1))

            supervised = ut.timeseries_to_supervised(train, order[0])
            data = supervised.values

            RF_model = mu.fit_model(data, model)

            pred_temp = []

            for j in range(test_shape):
                X, y = data[:, 0:-1], data[:, -1]
                yhat = mu.forecast_model(RF_model, X)

                #TODO: Inverse differencing and scaling

                #                if scale_flag==1:
                #                    yhat=scaler.inverse_transform(pd.DataFrame(yhat).values.reshape(-1,1))
                #                if order[1]!=0:
                #                    inverted = list()
                #                    for i in range(len(yhat)):
                #                        value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i)
                #                        inverted.append(value)
                #                    inverted = np.array(inverted)
                #                    forecast=inverted[-1]
                #                else:
                #                    forecast = yhat[-1]
                forecast = yhat[-1]
                if forecast < 0:
                    forecast = mu.weighted_moving_average(dataset, 1, 3)[0]

                pred_temp.append(forecast)

                train = np.append(train, forecast)

                diff_train = ut.difference(train, order[1])

                if scale_flag == 1:
                    scaler = ut.scaler_selection(key)
                    diff_train = scaler.fit_transform(
                        pd.DataFrame(diff_train).values.reshape(-1, 1))

                supervised = ut.timeseries_to_supervised(train, order[0])
                data = supervised.values

            pred_temp = pred_temp[1:4]
            mu.plotting(key, pred_temp, expected)
            if i == 2:
                predictions.extend(pred_temp)
            else:
                predictions.append(pred_temp[0])

            rmse_val.append(mu.calculate_rmse(key, expected, pred_temp))
            itr = itr - 1

    else:

        dataset_1 = copy.deepcopy(dataset)
        diff_values = ut.difference(dataset_1, order[1])

        if scale_flag == 1:
            scaler = ut.scaler_selection(key)
            diff_values = scaler.fit_transform(
                pd.DataFrame(diff_values).values.reshape(-1, 1))

        supervised = ut.timeseries_to_supervised(diff_values, order[0])
        data = supervised.values

        RF_model = mu.fit_model(data, model)
        try:
            parameter_values = model.best_params_
        except:
            parameter_values = model.get_params()

        test_shape = test_shape + 2
        for i in range(test_shape):

            X, y = data[:, 0:-1], data[:, -1]

            yhat = mu.forecast_model(RF_model, X)
            #
            #            if scale_flag==1:
            #                yhat=scaler.inverse_transform(pd.DataFrame(yhat).values.reshape(-1,1))
            #            if order[1]!=0:
            #                inverted = list()
            #                for i in range(len(yhat)):
            #                    value = ut.inverse_difference(data, yhat[i], len(data) - i)
            #                    inverted.append(value)
            #                    inverted = np.array(inverted)
            #                forecast=inverted[-1]
            #            else:
            #                forecast = yhat[-1]
            forecast = yhat[-1]
            if forecast < 0:
                forecast = mu.weighted_moving_average(data, 1, 3)[0]

            predictions.append(forecast)
            dataset_1 = np.append(dataset_1, forecast)

            diff_values = ut.difference(dataset_1, order[1])

            if scale_flag == 1:
                scaler = ut.scaler_selection(key)
                diff_values = scaler.fit_transform(
                    pd.DataFrame(diff_values).values.reshape(-1, 1))

            supervised = ut.timeseries_to_supervised(diff_values, order[0])
            data = supervised.values
        predictions = predictions[2:test_shape]
    predictions = [int(i) for i in predictions]
    return predictions, rmse_val, parameter_values
示例#11
0
def model_SVR_RBF(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0):
    predictions = []
    for i in range(tsize):
        diff_values = ut.difference(dataset, 1)
        supervised = ut.timeseries_to_supervised(diff_values, 1)
        data = supervised.values

        if train_flag == 1:
            train = data[0:-tsize]
        else:
            train = data

        X, y = train[:, 0:-1].reshape(-1, 1), train[:, -1]

        mod = SVR()

        g = [
            pow(2, -15),
            pow(2, -14),
            pow(2, -13),
            pow(2, -12),
            pow(2, -11),
            pow(2, -10),
            pow(2, -9),
            pow(2, -8),
            pow(2, -7),
            pow(2, -6),
            pow(2, -5),
            pow(2, -4),
            pow(2, -3),
            pow(2, -2),
            pow(2, -1),
            pow(1, 0),
            pow(2, 1),
            pow(2, 2),
            pow(2, 3)
        ]

        C = [
            pow(2, -5),
            pow(2, -4),
            pow(2, -3),
            pow(2, -2),
            pow(2, -1),
            pow(1, 0),
            pow(2, 1),
            pow(2, 2),
            pow(2, 3),
            pow(2, 4),
            pow(2, 5),
            pow(2, 6),
            pow(2, 7),
            pow(2, 8),
            pow(2, 9),
            pow(2, 10),
            pow(2, 11),
            pow(2, 12),
            pow(2, 13),
            pow(2, 14),
            pow(2, 15)
        ]

        param = {'gamma': g, 'kernel': ['rbf'], 'C': C}
        grid_search = RandomizedSearchCV(mod, param, n_jobs=1, n_iter=100)
        grid_search.fit(X, y)
        clf = SVR(gamma=grid_search.best_params_["gamma"],
                  kernel=grid_search.best_params_["kernel"],
                  C=grid_search.best_params_["C"])
        clf.fit(X, y)
        yhat = mu.forecast_model(clf, X)

        inverted = list()
        for i in range(len(yhat)):
            value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i)
            inverted.append(value)
        inverted = np.array(inverted)

        forecast = inverted[-1]
        if forecast < 0:
            forecast = mu.weighted_moving_average(dataset, 1, 3)[0]
        predictions.append(forecast)
        dataset = np.append(dataset, forecast)
    predictions = [int(i) for i in predictions]
    return predictions
示例#12
0
def training(details_data, datasets, forecast_period):

    facc_out = dict()
    rsq = dict()
    price = details_data['price']
    price = [float(i) for i in price]
    price = pd.Series(price).fillna(0).tolist()

    sku_list = details_data['sku']
    market = details_data['market']
    plant = details_data['plant']
    spn = details_data['spn']
    abc_data = details_data['abc_data']

    #Profiling
    data_prof = profiling.profiling_tech(datasets)

    #Clustering based on nature
    data_cluster = Cluster.clustering(data_prof)

    total_price = np.sum(price)
    sku_price = dict()
    for i, sku in enumerate(sku_list):
        sku_price[str(sku)] = price[i]

    #Market Based Clustering
#    plant_cluster=Cluster.clustering_plant(sku_list,plant,market)

#XYZ based on unit cost
    xyz_data = xyz.xyz_class(sku_price, total_price)
    #ABC based on volume
    abc_alter = abc.abc_class(datasets)

    #    trained_outputs = []
    forecast_results = []
    num = 0
    for incr, sku in enumerate(datasets):
        num += 1
        #        if sku!='1702460700':
        ##        if num!=1:
        #            continue
        prof = data_prof.iloc[incr]
        print("------------------------------------------------------------")
        print("Running SKU %d: %s..." % (num, sku))
        print("cluster :  ", data_cluster[sku])

        raw_data = copy.deepcopy(datasets[sku].T)
        output = ut.init_output(forecast_period, raw_data, prof)
        output['unit_cost'] = float(price[incr])
        output['market'] = str(market[incr])
        output['plant'] = str(plant[incr])
        if pd.isnull(abc_data[incr]) == True:
            output['Variability_Segment'] = abc_alter[sku]
        else:
            output['Variability_Segment'] = abc_data[incr]
        output['Velocity_segment'] = xyz_data[sku]
        output['spn'] = spn[incr]

        dataset = raw_data.copy()

        dataset = dataset[:-1]
        #        dataset = pp.dateformat(dataset)
        #        dataset, interval = pp.impute_missing_dates(dataset)
        #        print(interval.days)

        if ((dataset['sales'] == 0).all() == True
                or (set([math.isnan(x) for x in dataset['sales']]) == {True})):
            #            print(dataset['sales'])
            print("All zeros/NaNs")
            forecast = [0] * forecast_period
            output['forecast_values'] = ut.assign_dates(
                forecast, 'forecast', dataset.tail(1))
            output['facc'], output['mape'], output[
                'bias'] = ft.calculate_forecast_accuracy(
                    raw_data.iloc[-1], forecast[0])
            facc_out[sku] = np.mean(
                ft.calculate_validation_facc(forecast, forecast))
            forecast_results = ft.output_forecast(sku, dataset,
                                                  datasets[sku].T, output,
                                                  forecast_results)
            continue

        sku_data = dataset.astype(np.float32)
        sku_data = pp.read_from_first_sales(sku_data['sales'])

        #size--->outlier bucket size
        #sparse_size ---> number of zeros to categorize as sparse data
        #freq ---> seasonality
        interval = 30
        size = 6
        sparse_size = 10
        freq = 12
        #        size,sparse_size,freq=pp.get_bucket_size(interval)

        test_nan = pd.DataFrame(sku_data[-freq:])
        test_nan = test_nan['sales']

        #if last 1 year is NaN, impute data with zero and forecast is MA(6)

        if sum(test_nan.isnull()) >= freq:
            print("Last 1 year NaN")
            sku_data = pp.data_imputation_zero(test_nan)
            sku_data = sku_data[:-5]
            expected = [0] * 5
            forecast = mu.moving_average(sku_data, forecast_period, 6)
            output['forecast_values'] = ut.assign_dates(
                forecast, 'forecast', dataset.tail(1))
            output['facc'], output['mape'], output[
                'bias'] = ft.calculate_forecast_accuracy(
                    raw_data.iloc[-1], forecast[0])
            facc_out[sku] = np.mean(
                ft.calculate_validation_facc(expected, forecast))
            forecast_results = ft.output_forecast(sku, dataset, sku_data,
                                                  output, forecast_results)
            continue

#if # NaNs more than 60% impute with 0 else impute with values

        if sum(pd.isnull(sku_data)) > (0.6 * len(sku_data)):
            print("Nan Greater than 60%")
            sku_data = pp.data_imputation_zero(sku_data)

        else:
            print("Nan less than 60%")
            sku_data = pp.data_imputation(sku_data, freq)
            sku_data = sku_data[0]

        sku_data = pp.read_from_first_sales(sku_data)

        #After reading from first non-zero if data is insufficient ---> weighted MA(3)

        if len(sku_data) < 20:
            try:
                print("Weighted Moving Average")
                forecast = mu.weighted_moving_average(sku_data,
                                                      forecast_period, 3)
                output['forecast_values'] = ut.assign_dates(
                    forecast, 'forecast', dataset.tail(1))
                output['facc'], output['mape'], output[
                    'bias'] = ft.calculate_forecast_accuracy(
                        raw_data.iloc[-1], forecast[0])
                facc_out[sku] = ft.calculate_forecast_accuracy(
                    raw_data.iloc[-1], forecast[0])
                forecast_results = ft.output_forecast(sku, dataset, sku_data,
                                                      output, forecast_results)
            except:
                print("Less than 3")
                print(sku_data)
                forecast = mu.moving_average(sku_data, forecast_period,
                                             len(sku_data))
                output['forecast_values'] = ut.assign_dates(
                    forecast, 'forecast', dataset.tail(1))
                output['facc'], output['mape'], output[
                    'bias'] = ft.calculate_forecast_accuracy(
                        raw_data.iloc[-1], forecast[0])
                facc_out[sku] = ft.calculate_forecast_accuracy(
                    raw_data.iloc[-1], forecast[0])
                forecast_results = ft.output_forecast(sku, dataset, sku_data,
                                                      output, forecast_results)

            continue

        data_copy = sku_data.copy()
        data_copy = np.array(data_copy)
        #        plt.figure()
        #        plt.plot(data_copy)

        index1, index2, sflag1, sflag2 = pp.Sesonal_detection(sku_data)
        sku_data = pp.outlier_treatment_tech(sku_data, interval, size)
        sku_data = np.array(sku_data[0])

        if sflag1 == 1:
            sku_data[index1] = data_copy[index1]
        if sflag2 == 1:
            sku_data[index2] = data_copy[index2]
        else:
            sku_data = sku_data
#        plt.plot(sku_data)
#        plt.show()
#        continue
        sku_data = pd.DataFrame(sku_data)

        #Testing Stationarity
        d = 0
        df_test_result = tests.dickeyfullertest(
            sku_data.T.squeeze())  #pd.Series(sku_data[0])

        while df_test_result == 0:
            d += 1
            if d == 1:
                new_data = ut.difference(sku_data[0].tolist())
            else:
                new_data = ut.difference(new_data)
            df_test_result = tests.dickeyfullertest(new_data)

        sample = np.array(sku_data)
        repeat = mu.check_repetition(sample, freq, 1, len(sample))
        #Finding p and q value
        try:
            if d == 0:
                p1, ps, pl = plots.acf_plot(sku_data, freq)
                q = plots.pacf_plot(sku_data, freq)
                data = sku_data
            else:

                p, ps, pl = plots.acf_plot(new_data, freq)
                q = plots.pacf_plot(new_data, freq)
                data = new_data

            if repeat in ps:
                p = repeat
            elif repeat in pl:
                p = repeat
            else:
                p = pl[0]
            if p > freq:
                p = freq
        except:
            p = 1
            q = 1
            data = sku_data

        data = sku_data
        best_order = (p, d, q)
        print("BEST ORDER :", best_order)
        #TODO: Calculate tsize
        tsize = 5
        #        tsize = int(0.2*len(data))
        #        print(test)
        expected = data[-tsize:].reset_index(drop=True)
        expected = [float(i) for i in expected.values]
        #        print("Dimension: ", data.shape)
        train_6wa = sku_data[0:-tsize]
        predictions_ML, rmse_ML = train.time_series_using_ml(
            sku_data, tsize, best_order, data_cluster[sku])
        rmse_ARIMA, rmse_ES, rmse_naive, rmse_ma, predictions_ARIMA, predictions_ES, predictions_naive, predictions_ma = train.time_series_models(
            freq, sku_data, data, tsize, best_order, data_cluster[sku])
        print("Modeling done")

        rmse_TS = rmse_ARIMA.copy()
        rmse_TS.update(rmse_ES)
        rmse_TS.update(rmse_naive)
        rmse_TS.update(rmse_ma)

        predictions = predictions_ML
        predictions.update(predictions_ARIMA)
        predictions.update(predictions_ES)
        predictions.update(predictions_naive)
        predictions.update(predictions_ma)

        if data_cluster[sku] in [1, 4, 7, 10, 13, 16, 19, 22, 25]:
            rmse_Croston, predictions_Croston = mu.Croston_TSB(sku_data, tsize)
            rmse_TS.update(rmse_Croston)
            predictions.update(predictions_Croston)

        rmse_vol_ml = dict()
        for key in rmse_ML:
            std = np.std(rmse_ML[key])
            mean = np.mean(rmse_ML[key])
            rmse_vol_ml[key] = mean
#            if std == 0:
#                rmse_vol_ml[key]= mean
#            else:
#                rmse_vol_ml[key] = mean/std

        rmse_vol_ts = dict()
        for key in rmse_TS:
            mean = np.mean(rmse_TS[key])
            std = np.std(rmse_TS[key])
            rmse_vol_ts[key] = mean
#            if std == 0:
#                rmse_vol_ts[key] = mean
#            else:
#                rmse_vol_ts[key]= mean/std

#Top 3 models
        best_models_ml = sorted(rmse_vol_ml,
                                key=rmse_vol_ml.get,
                                reverse=False)[:3]
        best_models_ts = sorted(rmse_vol_ts,
                                key=rmse_vol_ts.get,
                                reverse=False)[:3]

        #        forecasts_ml = dict()
        #        validation_ml = dict()
        bias_ml = []
        accuracy_ml = []
        for model in best_models_ml:
            #            temp = ft.model_predict(model, best_order,data, forecast_period)
            #            forecasts_ml[model] = [0 if i < 0 else int(i) for i in temp]
            #            validation_ml[model] = predictions[model]
            bias_ml.append(
                (sum(expected) - sum(predictions[model])) / len(expected))
            accuracy_ml.append(mu.calculate_facc(expected, predictions[model]))
        bias_ml = [float(format(i, '.3f')) for i in bias_ml]
        accuracy_ml = [float(format(i, '.3f')) for i in accuracy_ml]

        #        forecasts_ts = dict()
        #        validation_ts = dict()
        bias_ts = []
        accuracy_ts = []
        for model in best_models_ts:
            #            temp = ft.model_predict(model, best_order, sku_data, forecast_period,repeat)
            #            forecasts_ts[model] = [0 if i < 0 else int(i) for i in temp]
            #            validation_ts[model] = predictions[model]
            bias_ts.append(
                (sum(expected) - sum(predictions[model])) / len(expected))
            accuracy_ts.append(mu.calculate_facc(expected, predictions[model]))
        bias_ts = [float(format(i, '.3f')) for i in bias_ts]
        accuracy_ts = [float(format(i, '.3f')) for i in accuracy_ts]

        #For one ensemble
        error_ml = min(rmse_vol_ml.values())
        error_ts = min(rmse_vol_ts.values())

        best_models = [
            min(rmse_vol_ml, key=lambda x: rmse_vol_ml.get(x)),
            min(rmse_vol_ts, key=lambda x: rmse_vol_ts.get(x))
        ]
        print("BEST MODELS :", best_models)
        print("ERRORS OF BEST MODELS :", error_ml, error_ts)
        forecast_ml, param_val_fore = ft.model_predict(best_models[0],
                                                       best_order, data,
                                                       forecast_period)

        if best_models[1] == 'Croston':
            rmse_Croston, forecast_ts = mu.Croston_TSB(sku_data,
                                                       forecast_period)
            forecast_ts = forecast_ts['Croston']
        else:
            forecast_ts, param_val = ft.model_predict(best_models[1],
                                                      best_order, sku_data,
                                                      forecast_period, repeat)

        forecast_ml = [0 if i < 0 else int(i) for i in forecast_ml]
        forecast_ts = [0 if i < 0 else int(i) for i in forecast_ts]

        weight_ts, weight_ml = ut.weight_calculation(data, best_models,
                                                     best_order)
        print("weight ts:", weight_ts)
        print("weight ml:", weight_ml)

        Vm = predictions[best_models[0]]
        Vt = predictions[best_models[1]]

        Ve = ut.method_ensemble(Vm, Vt, weight_ml, weight_ts, tsize)
        error_en = mu.calculate_rmse('Ensemble', expected, Ve)

        bias_en = []
        accuracy_en = []

        bias_en.append((sum(expected) - sum(Ve)) / len(expected))
        accuracy_en.append(mu.calculate_facc(expected, Ve))
        bias_en = [float(format(i, '.3f')) for i in bias_en]
        accuracy_en = [float(format(i, '.3f')) for i in accuracy_en]
        #Ensemble of six month naive and weighted average
        V6wa, rmse_6wa = ts.model_Naive('naive6wa',
                                        train_6wa,
                                        tsize, (0, 0, 0),
                                        0,
                                        train_flag=1)
        error_6wa = np.mean(rmse_6wa)
        forecast_6wa, param_val = ft.model_predict('naive6wa', best_order,
                                                   data, forecast_period)

        forecast_en = ut.method_ensemble(forecast_ml, forecast_ts, weight_ml,
                                         weight_ts, forecast_period)

        output['forecast_period'] = forecast_period
        output['interval'] = 'M'
        output['best_models_ml'] = best_models_ml
        output['best_models_ts'] = best_models_ts
        output['bias_ml'] = bias_ml
        output['bias_ts'] = bias_ts
        output['bias_en'] = bias_en
        output['accuracy_ml'] = accuracy_ml
        output['accuracy_ts'] = accuracy_ts
        output['accuracy_en'] = accuracy_en
        output['TS'] = op.best_model_details_ts(best_models[1], bias_ts[0],
                                                accuracy_ts[0], best_order)
        output['ML'] = op.best_model_details_ml(best_models[0], bias_ml[0],
                                                accuracy_ml[0], param_val_fore)
        output['Ensemble'] = {"bias": bias_en[0], "accuracy": accuracy_en[0]}

        error_min_model = min(error_ml, error_ts, error_en)

        print("Errors:", )
        print("ML:", error_ml)
        print("TS:", error_ts)
        print("Ensemble:", error_en)
        print("six_naive_WA", error_6wa)

        min_error = min(error_min_model, error_6wa)

        if min_error == error_ml:
            ftt = forecast_ml
        elif min_error == error_ts:
            ftt = forecast_ts
        elif min_error == error_en:
            ftt = forecast_en
        else:
            ftt = []

        if min_error == error_6wa or all(elem == ftt[0]
                                         for elem in ftt) == True:
            print("Best forecast from six naive")
            forecast = forecast_6wa
            output['validation'] = ut.assign_dates(V6wa, 'validation',
                                                   dataset.tail(5))
            validation_facc = ft.calculate_validation_facc(expected, V6wa)
            output['validation_facc'] = ut.assign_dates(
                validation_facc, 'val_facc', dataset.tail(5))
        elif min_error == error_ml:
            print("Best forecast from ML")
            forecast = forecast_ml
            output['validation'] = ut.assign_dates(Vm, 'validation',
                                                   dataset.tail(5))
            validation_facc = ft.calculate_validation_facc(expected, Vm)
            output['validation_facc'] = ut.assign_dates(
                validation_facc, 'val_facc', dataset.tail(5))
        elif min_error == error_en:
            print("Best forecast from Ensemble")
            forecast = forecast_en
            output['validation'] = ut.assign_dates(Ve, 'validation',
                                                   dataset.tail(5))
            validation_facc = ft.calculate_validation_facc(expected, Ve)
            output['validation_facc'] = ut.assign_dates(
                validation_facc, 'val_facc', dataset.tail(5))
        elif min_error == error_ts:
            print("Best forecast from TS")
            forecast = forecast_ts
            output['validation'] = ut.assign_dates(Vt, 'validation',
                                                   dataset.tail(5))
            validation_facc = ft.calculate_validation_facc(expected, Vt)
            output['validation_facc'] = ut.assign_dates(
                validation_facc, 'val_facc', dataset.tail(5))
#
        print("Forecasts:")
        print("ML:", forecast_ml)
        print("TS:", forecast_ts)
        print("Ensemble:", forecast_en)
        print("Best Forecast", forecast)

        output['forecast_values'] = ut.assign_dates(forecast, 'forecast',
                                                    dataset.tail(1))
        output['facc'], output['mape'], output[
            'bias'] = ft.calculate_forecast_accuracy(raw_data.iloc[-1].sales,
                                                     forecast[0])
        facc_out[sku] = np.mean(validation_facc)

        output['forecast_ml'] = ut.assign_dates(forecast_ml, 'forecast',
                                                dataset.tail(1))
        output['forecast_ts'] = ut.assign_dates(forecast_ts, 'forecast',
                                                dataset.tail(1))
        output['forecast_en'] = ut.assign_dates(forecast_en, 'forecast',
                                                dataset.tail(1))
        output['model_ml'] = best_models[0]
        output['model_ts'] = best_models[1]

        forecast_results = ft.output_forecast(sku, dataset, sku_data, output,
                                              forecast_results)

        ft.plot_all_forecasts(dataset, sku_data, forecast, forecast_en,
                              forecast_ml, forecast_ts, sku)

    return forecast_results, facc_out
示例#13
0
 def other(numbers):
     return util.difference(range(1, 10), numbers)
示例#14
0
 def remove_numbers(self, index, numbers):
     l = len(self[index])
     self[index] = util.difference(self[index], numbers)
     return len(self[index]) < l