Пример #1
0
def generate_bracket_csv(aug_train_set):
    df = aug_train_set[aug_train_set.bracketing_pattern == brapa]
    grouped = df.groupby("tube_assembly_id")
    taids = []
    fixed_costs = []
    var_costs = []
    for taid, indices in grouped.groups.iteritems():
        quantities = df.quantity[indices].values
        costs = inverse_log_transform_y(df.log_cost[indices].values)
        fixed_cost, var_cost, r2 = get_fixed_and_var_cost(quantities, costs)
        if r2 < 0.9999:
            print "{} has bad r2".format(taid)
        taids.append(taid)
        fixed_costs.append(fixed_cost)
        var_costs.append(var_cost)
    fixed_costs = np.array(fixed_costs)

    fc_class = -1 * np.ones(len(taids), dtype=np.int)
    adj_fixed_costs = np.zeros(len(taids))
    for i, fc_val in enumerate(fc_vals):
        indices = np.abs(fixed_costs - fc_val) < 0.1
        fc_class[indices] = i
        adj_fixed_costs[indices] = fc_val
    assert np.all(np.unique(fc_class) == [0, 1, 2, 3])

    adj_var_costs = np.zeros(len(taids))
    for i, taid in enumerate(taids):
        indices = grouped.groups[taid]
        quantities = df.quantity[indices].values
        costs = inverse_log_transform_y(df.log_cost[indices].values)
        fixed_cost = adj_fixed_costs[i]
        adj_var_costs[i] = get_var_cost_only(quantities, costs, fixed_cost)
        assert np.abs(adj_var_costs[i] - var_costs[i]) < 0.01

    df = pd.DataFrame(
        {
            "tube_assembly_id": taids,
            "fixed_cost_class": fc_class,
            "fixed_cost": adj_fixed_costs,
            "var_cost": adj_var_costs,
        }
    )
    df.to_csv("bracket.csv", index=False, columns=["tube_assembly_id", "fixed_cost_class", "fixed_cost", "var_cost"])
Пример #2
0
def generate_bracket_csv(aug_train_set):
    df = aug_train_set[aug_train_set.bracketing_pattern == brapa]
    grouped = df.groupby('tube_assembly_id')
    taids = []
    fixed_costs = []
    var_costs = []
    for taid, indices in grouped.groups.iteritems():
        quantities = df.quantity[indices].values
        costs = inverse_log_transform_y(df.log_cost[indices].values)
        fixed_cost, var_cost, r2 = get_fixed_and_var_cost(quantities, costs)
        if r2 < 0.9999:
            print "{} has bad r2".format(taid)
        taids.append(taid)
        fixed_costs.append(fixed_cost)
        var_costs.append(var_cost)
    fixed_costs = np.array(fixed_costs)

    fc_class = -1 * np.ones(len(taids), dtype=np.int)
    adj_fixed_costs = np.zeros(len(taids))
    for i, fc_val in enumerate(fc_vals):
        indices = np.abs(fixed_costs - fc_val) < 0.1
        fc_class[indices] = i
        adj_fixed_costs[indices] = fc_val
    assert np.all(np.unique(fc_class) == [0, 1, 2, 3])

    adj_var_costs = np.zeros(len(taids))
    for i, taid in enumerate(taids):
        indices = grouped.groups[taid]
        quantities = df.quantity[indices].values
        costs = inverse_log_transform_y(df.log_cost[indices].values)
        fixed_cost = adj_fixed_costs[i]
        adj_var_costs[i] = get_var_cost_only(quantities, costs, fixed_cost)
        assert np.abs(adj_var_costs[i] - var_costs[i]) < 0.01

    df = pd.DataFrame({
        'tube_assembly_id': taids,
        'fixed_cost_class': fc_class,
        'fixed_cost': adj_fixed_costs,
        'var_cost': adj_var_costs,
    })
    df.to_csv('bracket.csv', index=False, columns=[
        'tube_assembly_id', 'fixed_cost_class', 'fixed_cost', 'var_cost'])
Пример #3
0
    X_train = aug_train_set
    y_train = X_train.pop('log_cost')
    X_test = aug_test_set

    print "Predicting..."
    timer = time()
    y_train_pred = get_predictions('all', expert_names, base_get_indices,
                                   aug_train_set)
    train_rmsle = np.sqrt(mean_squared_error(y_train.values, y_train_pred))
    print "train RMSLE", train_rmsle
    y_test_pred = get_predictions('all', expert_names, base_get_indices,
                                  aug_test_set)
    timer = time() - timer
    print "    {} seconds elapsed".format(timer)

    print "Writing output..."
    timer = time()
    df = pd.DataFrame()
    df['cost'] = inverse_log_transform_y(y_train_pred)
    df['id'] = df.index + 1
    df.to_csv("train_pred.csv", index=False, columns=['id', 'cost'])
    df = pd.DataFrame()
    df['cost'] = inverse_log_transform_y(y_test_pred)
    df['id'] = df.index + 1
    df.to_csv("test_pred.csv", index=False, columns=['id', 'cost'])
    timer = time() - timer
    print "    {} seconds elapsed".format(timer)

    print "Done!"
Пример #4
0
    X_train = aug_train_set
    y_train = X_train.pop('log_cost')
    X_test = aug_test_set

    print "Predicting..."
    timer = time()
    y_train_pred = get_predictions(
        'all', ['base'], base_get_indices, aug_train_set)
    train_rmsle = np.sqrt(mean_squared_error(y_train.values, y_train_pred))
    print "train RMSLE", train_rmsle
    y_test_pred = get_predictions(
        'all', ['base'], base_get_indices, aug_test_set)
    timer = time() - timer
    print "    {} seconds elapsed".format(timer)

    print "Writing output..."
    timer = time()
    df = pd.DataFrame()
    df['cost'] = inverse_log_transform_y(y_train_pred)
    df['id'] = df.index + 1
    df.to_csv("train_pred.csv", index=False, columns=['id', 'cost'])
    df = pd.DataFrame()
    df['cost'] = inverse_log_transform_y(y_test_pred)
    df['id'] = df.index + 1
    df.to_csv("test_pred.csv", index=False, columns=['id', 'cost'])
    timer = time() - timer
    print "    {} seconds elapsed".format(timer)

    print "Done!"