Exemplo n.º 1
0
    preds[preds < 0] = 0
    for i in range(min(len(preds), number_print)):
        print("PRED: ", preds[i], "   y: ", y[i])

    print("R2: ", eva.r2(ds, preds, 'NumberOfSales'))


if __name__ == '__main__':
    TRAIN = True
    LOAD = False
    SAVE_DF = False
    name = "test"
    ds = d.read_imputed_onehot_dataset()
    ds = prepare_ds(ds)
    d.save_dataset(ds, "fully_preprocessed_ds.csv")
    ds_train = utils.get_frame_in_range(ds, 3, 2016, 12, 2017)
    ds_test = utils.get_frame_in_range(ds, 1, 2018, 2, 2018)
    y = prepare_out(ds_train)
    real_y = np.array(y)
    dy = np.zeros(y.shape)
    x = drop_useless(ds_train)
    y_test = prepare_out(ds_test)
    if SAVE_DF:
        d.save_dataset(ds_test, "dataset_to_predict_sales.csv")
    x_test = drop_useless(ds_test)

    models = []
    for i in range(number_of_model):
        if not LOAD:
            models.append(m.nonsequentialNNDropout(x.shape[1], i == 0))
        else:
Exemplo n.º 2
0
    ds = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=dts)\
        .exclude('NumberOfSales', 'Month')\
        .build()
    models = []
    x, y = ds.xtr, ds.ytr
    real_y = np.array(y)
    x_test, y_test = ds.xts, ds.yts
    for i in range(number_of_model):
        if not LOAD:
            models.append(m.single_relu(x.shape[1], i == 0))
        else:
            models.append(k.models.load_model("mod" + name + str(i) + ".h5"))
        opt = k.optimizers.adam(lr=2e-4)
        models[i].compile(optimizer=opt, loss='mean_squared_error', metrics=['mae'])
        models[i].summary()
        if TRAIN:
            models[i].fit(x=x, y=y, batch_size=20000, epochs=80, verbose=2, validation_data=(x_test, y_test))
            models[i].save("mod" + name + str(i) + ".h5")
            dy = models[i].predict(x, 500)
            print(dy.shape, y.shape)
            y = y.squeeze() - dy.squeeze()
        print(y)
        y.reshape([len(y), 1])

    print("################################################")
    print("EVALUATION ON TEST:")
    evaluate(models, number_of_model, utils.get_frame_in_range(dts, 1, 2018, 2, 2018), y_test, x_test, 1000)

    print("################################################")
    print("EVALUATION ON TRAIN:")
    evaluate(models, number_of_model, utils.get_frame_in_range(dts, 2, 2016, 12, 2017), real_y, x, 0)
Exemplo n.º 3
0
def regtree():
    return tree.DecisionTreeRegressor(max_depth=9)


def gradboostreg():
    return ensemble.GradientBoostingRegressor(max_depth=8, n_estimators=5)


if __name__ == '__main__':
    datas = ds.read_dataset("mean_var_on_customers_from_tain.csv")
    datas['Month'] = datas['Date']
    datas['Month'] = datas['Month'].apply(lambda x: x.split("-")[1])
    datas = imp.one_hot_numeric(datas, 'Month', 'Month_')
    datas = imp.one_hot_numeric(datas, 'Region', 'Region_')
    datas = preprocessing_utils.mean_cust_per_month_per_region(
        datas, utils.get_frame_in_range(datas, 3, 2016, 12, 2017))
    datas = preprocessing_utils.mean_cust_per_month_per_shop(
        datas, utils.get_frame_in_range(datas, 3, 2016, 12, 2017))
    datas = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datas)\
        .exclude('NumberOfSales', 'Month', 'Max_Humidity', 'Max_Sea_Level_PressurehPa', 'Max_TemperatureC',
                 'Max_VisibilityKm', 'Max_Wind_SpeedKm_h', 'Mean_Humidity', 'Mean_Sea_Level_PressurehPa',
                 'Mean_VisibilityKm', 'Mean_Wind_SpeedKm_h', 'Min_Dew_PointC',
                 'Min_Humidity', 'Min_Sea_Level_PressurehPa', 'Min_TemperatureC', 'Min_VisibilitykM')\
        .build()
    model = [
        ridge, linear_model.LinearRegression, lasso, regtree, gradboostreg
    ]
    final = ridge
    n = len(model)
    mods = []
    modpreds = []
Exemplo n.º 4
0
                     axis=1)).squeeze()
        clean_row = clean_row.reshape([1, -1])
        if sc:
            return self.predict_sc(clean_row).squeeze()
        else:
            return self.predict_oth(clean_row).squeeze()


def model1():
    return linear_model.Ridge(alpha=10)


def model2():
    return tree.DecisionTreeRegressor(max_depth=9)


data = data_manager.read_dataset("best_for_customers.csv")
model = CustomersPredoctorSeparateShopCenters(model1, model1, 10, 10)
model.train(data)
model.test(data)
data = utils.get_frame_in_range(data, 1, 2018, 2, 2018)
preds = []
for i in range(len(data)):
    irow = data.iloc[[i]]
    preds.append(model.predict(irow))
preds = np.array(preds)
data = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=data) \
            .exclude('NumberOfSales', 'Month') \
            .build()
print("FINAL R2: ", eval.evaluate(data.yts, preds))
Exemplo n.º 5
0
            'CloudCover', 'Max_Sea_Level_PressurehPa', 'WindDirDegrees',
            'Max_Dew_PointC', 'NumberOfCustomers', 'Day',
            'Mean_Sea_Level_PressurehPa', 'Min_Sea_Level_PressurehPa'
        ],
                axis=axis))
    return x


def model():
    return ensemble.GradientBoostingRegressor(max_depth=6, learning_rate=0.1)


datas = ds.read_dataset("mean_var_on_customers_from_tain.csv")
datas['Month'] = datas['Date']
datas['Month'] = datas['Month'].apply(lambda x: x.split("-")[1])
train = utils.get_frame_in_range(datas, 3, 2016, 12, 2017)
test = utils.get_frame_in_range(datas, 1, 2018, 2, 2018)
poly = PolynomialFeatures(degree=1)
sum = 0
types = [
    'AssortmentType_General', 'AssortmentType_With Fish Department',
    'AssortmentType_With Non-Food Department'
]
models = {}
for i in types:
    print("TYPE " + str(i))
    d_reg = utils.get_frames_per_assortmenttype(train, i)
    d_reg_t = utils.get_frames_per_assortmenttype(test, i)
    print("N_SAMPLES: ", len(d_reg) + len(d_reg_t))
    if len(d_reg) == 0:
        continue
Exemplo n.º 6
0
def monthlyplot(df,
                bm=3,
                by=2016,
                em=2,
                ey=2018,
                regions=None,
                storetype=False,
                perstore=False,
                target="NumberOfSales",
                show=True,
                save=False):
    if em == 12:
        em = 1
        ey += 1
    else:
        em += 1

    base_bm = bm
    base_by = by
    base_em = em
    base_ey = ey

    if regions is None and not storetype and not perstore:
        month_x = []
        sales_y = []
        while bm != em or by != ey:
            dfframe = get_frame_in_range(df, bm, by, bm, by)
            month_x.append(str(bm) + "-" + str(by))
            sales_y.append(dfframe[target].sum())
            bm += 1
            if bm == 13:
                bm = 1
                by += 1

        sb.barplot(x=month_x,
                   y=sales_y).set_title("Monthly " + target + " (All shops)")
    elif storetype:
        title = "Monthly " + target + " per StoreType"
        palette = sb.color_palette("hls", 4)
        month_x = []
        sales_y_1 = []
        sales_y_2 = []
        sales_y_3 = []
        sales_y_4 = []
        while bm != em or by != ey:
            dfframe = get_frame_in_range(df, bm, by, bm, by)
            month_x.append(str(bm) + "-" + str(by))
            sales_y_1.append(
                dfframe[dfframe["StoreType_Hyper Market"] == 1][target].mean())
            sales_y_2.append(
                dfframe[dfframe["StoreType_Super Market"] == 1][target].mean())
            sales_y_3.append(dfframe[dfframe["StoreType_Standard Market"] == 1]
                             [target].mean())
            sales_y_4.append(dfframe[dfframe["StoreType_Shopping Center"] == 1]
                             [target].mean())
            bm += 1
            if bm == 13:
                bm = 1
                by += 1

        pdf1 = pd.DataFrame(data={
            "Month": month_x,
            target: sales_y_1,
            "Type": "Hyper Market"
        })
        pdf2 = pd.DataFrame(data={
            "Month": month_x,
            target: sales_y_2,
            "Type": "Super Market"
        })
        pdf3 = pd.DataFrame(data={
            "Month": month_x,
            target: sales_y_3,
            "Type": "Standard Market"
        })
        pdf4 = pd.DataFrame(data={
            "Month": month_x,
            target: sales_y_4,
            "Type": "Shopping Center"
        })
        pdf = pd.concat([pdf1, pdf2, pdf3, pdf4])
        sb.pointplot(x="Month", y=target, data=pdf,
                     hue="Type").set_title(title)
    elif perstore:
        pdf = []
        palette = sb.color_palette("hls", 750)
        for i in values_of(df, "StoreID"):
            month_x = []
            sales_y = []
            while bm != em or by != ey:
                dfframe = get_frame_in_range(df[df["StoreID"] == i], bm, by,
                                             bm, by)
                month_x.append(str(bm) + "-" + str(by))
                sales_y.append(dfframe[target].mean())
                bm += 1
                if bm == 13:
                    bm = 1
                    by += 1
            pdf.append(
                pd.DataFrame(data={
                    "Month": month_x,
                    target: sales_y,
                    "StoreID": i
                }))

            bm = base_bm
            by = base_by
            em = base_em
            ey = base_ey
        pdf = pd.concat(pdf)
        sb.pointplot(
            x="Month", y=target, data=pdf,
            hue="StoreID").set_title("Monthly NumberOfCustomers per Store")
    else:
        pdf = []
        palette = sb.color_palette("hls", 11)
        for i in regions:
            month_x = []
            sales_y = []
            while bm != em or by != ey:
                dfframe = get_frame_in_range(df[df["Region"] == i], bm, by, bm,
                                             by)
                month_x.append(str(bm) + "-" + str(by))
                sales_y.append(dfframe[target].mean())
                bm += 1
                if bm == 13:
                    bm = 1
                    by += 1
            pdf.append(
                pd.DataFrame(data={
                    "Month": month_x,
                    target: sales_y,
                    "Region": i
                }))

            bm = base_bm
            by = base_by
            em = base_em
            ey = base_ey
        pdf = pd.concat(pdf)
        sb.pointplot(
            x="Month", y=target, data=pdf,
            hue="Region").set_title("Monthly NumberOfCustomers per Region")

    plt.legend()
    fig = plt.gcf()
    fig.set_size_inches(18, 9)

    if show:
        plt.show()
    if save:
        fig.savefig("monthly_plot.png")
Exemplo n.º 7
0
    df = ds.read_dataset("best_for_customers.csv")
    sb.lmplot(x="meancustshop",
              y="meancust_std_shop",
              data=df,
              hue="StoreType_Shopping Center")
    fig = plt.gcf()
    fig.set_size_inches(18, 9)

    if show:
        plt.show()
    if save:
        fig.savefig("meanstdscatterpershop.png")


if __name__ == '__main__':
    import dataset.dataset as d
    import dataset.utility as utils
    import pandas as pd
    ds = d.read_imputed_onehot_dataset()
    monthlyplot(ds)
    y = 2016
    m = 3
    while y != 2018 or m != 3:
        sub_ds = utils.get_frame_in_range(ds, m, y, m, y)
        expected_out = d.to_numpy(sub_ds[['NumberOfSales']]).squeeze()
        print(str(m) + "/" + str(y) + ": ", expected_out.sum())
        m += 1
        if m == 13:
            m = 1
            y += 1