Exemplo n.º 1
0
def case_w2():
    """
    Teaching case week 2: https://yint.org/w2
    """
    # T = time used for baking:
    #      (-1) corresponds to 80 minutes and (+1) corresponds to 100 minutes
    T = c(-1, +1, -1, +1, lo=80, hi=100)

    # F = quantity of fat used:
    #      (-1) corresponds to 20 g and (+1) corresponds to 30 grams
    F = c(-1, -1, +1, +1, lo=20, hi=30)

    # Response y is the crispiness
    y = c(37, 57, 49, 53, units='crispiness')

    # Fit a linear model
    expt = gather(T=T, F=F, y=y)
    model_crispy = lm("y ~ T + F + T*F", expt)
    summary(model_crispy)

    # See how the two factors affect the response:
    contour_plot(model_crispy)
    #interaction_plot(T, F, y)
    #interaction_plot(F, T, y)

    # Make a prediction with this model:
    xT = +2  # corresponds to 110 minutes
    xF = -1  # corresponds to 20 grams of fat
    y_hat = predict(model_crispy, T=xT, F=xF)
    print(f'Predicted value is: {y_hat} crispiness.')
Exemplo n.º 2
0
def case_worksheet_10B():
    # Code for this system: https://rsmopt.com/system/concrete-strength/

    # C: cement = 1.8 and 4.2 kg)
    # W: amount of water (between 0.4 and 1.1 L)

    c1 = c(2.5,
           3,
           2.5,
           3,
           center=2.75,
           range=[2.5, 3],
           name="cement",
           units='kg')
    w1 = c(0.5,
           0.5,
           0.9,
           0.9,
           center=0.7,
           range=[0.5, 0.9],
           name='Throughput',
           units='parts/hour')
    C1 = c1.to_coded()
    W1 = w1.to_coded()
    y1 = c(14476, 14598, 14616, 14465, name="Strength", units="-")
    expt1 = gather(C=C1, W=W1, y=y1, title="First experiment")

    mod_base1 = lm("y ~ C * W", data=expt1)
    summary(mod_base1)
    contour_plot(mod_base1, "C", "W")

    # Predict the points, using the model:
    prediction_1 = predict(mod_base1, C=C1, W=W1)
    print(prediction_1)
    print(y1 - prediction_1)

    # Very nonlinear: saddle: up left, or bottom right
    # Bottom right: (C, W) = (2, -2)
    C2 = C1.extend([2])
    W2 = W1.extend([-2])

    # Predict at this point: 14794
    predict(mod_base1, C=C2, W=W2)
    c2 = C2.to_realworld()
    w2 = W2.to_realworld()

    # Actual: at c=3.25; w=0.4 (constraint): 14362. So wrong direction
    y1 = c(14476, 14598, 14616, 14465, name="Strength", units="-")
    expt1 = gather(C=C1, W=W1, y=y1, title="First experiment")

    # Try the other way: C, W= -2, 2
    C2 = C1.extend([-2])
    W2 = W1.extend([+2])

    # Predict at this point: 14830
    predict(mod_base1, C=C2, W=W2)
    c2 = C2.to_realworld()  # 2.25
    w2 = W2.to_realworld()  # 1.1
Exemplo n.º 3
0
def case_3B():
    """
    See video 3B in the Coursera series. R code equivalent: http://yint.org/3B

    Two factors, no extra degrees of freedom.
    """
    A = c(-1, +1, -1, +1, name='Additive')
    B = c(-1, -1, +1, +1, name='Blender')
    y = c(52, 74, 62, 80, units='number of popped kernels')

    expt = gather(A=A, B=B, y=y, title='Popping corn!')
    popped_corn = lm("y ~ A + B + A*B", expt)
    popped_corn = lm("y ~ A*B", expt)
    summary(popped_corn)
    contour_plot(popped_corn, show=False)
Exemplo n.º 4
0
def case_3C(show=False):
    """
    See video 3C in the Coursera series. R code equivalent: http://yint.org/3C

    3 factors, no extra degrees of freedom.
    """
    C = T = S = c(-1, +1)
    C, T, S = expand_grid(C=C, T=T, S=S)
    y = c(5, 30, 6, 33, 4, 3, 5, 4)
    expt = gather(C=C, T=T, S=S, y=y, title='Water treatment')

    water = lm("y ~ C * T * S", expt)
    summary(water)
    if show:
        contour_plot(water, "C", "T", show=show)
        pareto_plot(water, show=show, up_to_level=2)
Exemplo n.º 5
0
def case_w4_1():
    """
    Teaching case week 4: https://yint.org/w4
    """
    # S = Free shipping if order amount is €30 or more [-1],
    # or if order amount is over €50 [+1]
    S = c(-1, +1, -1, +1, -1, +1, -1, +1, name='Free shipping amount')

    # Does the purchaser need to create a profile first [+1] or not [-1]?
    P = c(-1, -1, +1, +1, -1, -1, +1, +1, name='Create profile: No/Yes')

    # Response: daily sales amount
    y = c(348, 359, 327, 243, 356, 363, 296, 257)

    # Linear model using S, P and S*P to predict the response
    expt = gather(S=S, P=P, y=y, title='Experiment without mistake')
    model_sales = lm("y ~ S*P", expt)
    summary(model_sales)
    contour_plot(model_sales)
Exemplo n.º 6
0
def case_w4_2():
    """
    Teaching case week 4: https://yint.org/w4
    """
    # S = Free shipping if order amount is €30 or more [-1], or if
    # order amount is over €50 [+1]. Notice that a mistake was made
    # with the last experiment: order minimum for free shipping was €60 [+1].
    S = c(-1, +1, -1, +1, -1, +1, -1, +2, name='Free shipping amount')

    # Does the purchaser need to create a profile first [+1] or not [-1]?
    P = c(-1, -1, +1, +1, -1, -1, +1, +1, name='Create profile: No/Yes')

    # Response: daily sales amount
    y = c(348, 359, 327, 243, 356, 363, 296, 220, units='€ sales')

    # Linear model using S, P and S*P to predict the response
    expt = gather(S=S, P=P, y=y, title='Experiment with mistake')
    model_sales_mistake = lm("y ~ S*P", expt)
    summary(model_sales_mistake)
    contour_plot(model_sales_mistake)
Exemplo n.º 7
0
def case_worksheet_10C():
    # Price: 0 # 0.05 above and 0.05 $/part below
    p1 = c(0.75,
           0.75,
           0.7,
           0.8,
           0.7,
           0.80,
           center=0.75,
           range=[0.70, 0.80],
           name="Price",
           units='$/part')
    t1 = c(325,
           325,
           300,
           300,
           350,
           350,
           center=325,
           range=[300, 350],
           name='Throughput',
           units='parts/hour')
    P1 = p1.to_coded()
    T1 = t1.to_coded()
    y1 = c(7082,
           7089,
           6637,
           6686,
           7181,
           7234,
           name="Response: profit per hour",
           units="$/hour")
    expt1 = gather(P=P1, T=T1, y=y1, title="First experiment")

    mod_base1 = lm("y ~ P * T", data=expt1)
    summary(mod_base1)
    contour_plot(mod_base1, "P", "T")

    # Predict the points, using the model:
    prediction_1 = predict(mod_base1, P=P1, T=T1)
    print(prediction_1)
    print(y1 - prediction_1)

    # We see clear non-linearity, especially when viewed in the direction of T

    # Try anyway to make a prediction, to verify it
    # P ~ 0.7 and T ~ 2.0:
    P2 = P1.extend([0.7])
    T2 = T1.extend([2.0])
    p2 = P2.to_realworld()
    t2 = T2.to_realworld()
    print(p2)  # 0.785
    print(t2)  # 375
    print(predict(mod_base1, P=P2, T=T2))

    # Should have a predicted profit of 7550, but actual is 7094.
    # Confirms our model is in a very nonlinear region in the T=Throughput
    # direction.

    # Add axial points, starting in the T direction:
    P3 = P2.extend([0, 0])
    T3 = T2.extend([1.68, -1.68])
    p3 = P3.to_realworld()
    t3 = T3.to_realworld()
    print(p3)  # 0.75, 0.75
    print(t3)  # 367, 283

    # Now build model with quadratic term in the T direction
    y3 = y1.extend([7094, 7174, 6258])
    expt3 = gather(P=P3, T=T3, y=y3, title="With axial points")
    mod_base3 = lm("y ~ P * T + I(T**2)", data=expt3)
    summary(mod_base3)
    contour_plot(mod_base3, "P", "T", xlim=(-1.5, 5))
    #

    #Try extrapolating far out: (P, T) = (4, 1)
    P4 = P3.extend([4])
    T4 = T3.extend([1])
    p4 = P4.to_realworld()
    t4 = T4.to_realworld()
    print(p4)  # 0.95
    print(t4)  # 350

    predict(mod_base3, P=P4, T=T4)  # 7301
    # Actual: 7291  # great! Keep going
    y4 = y3.extend([7291])

    #Try extrapolating far out: (P, T) = (6, 1)
    P5 = P4.extend([6])
    T5 = T4.extend([1])
    p5 = P5.to_realworld()
    t5 = T5.to_realworld()
    print(p5)  # 1.05
    print(t5)  # 350

    predict(mod_base3, P=P5, T=T5)  # 7344
    # Actual: 7324  # great! Keep going
    y5 = y4.extend([7324])

    # Visualize model first
    y5 = y
    expt5 = gather(P=P5, T=T5, y=y5, title="With extrapolated points")
    mod_base5 = lm("y ~ P * T + I(T**2)", data=expt5)
    summary(mod_base5)
    contour_plot(mod_base5, "P", "T", xlim=(-1.5, 18))

    #Try extrapolating further out: (P, T) = (10, 1)
    P6 = P5.extend([10])
    T6 = T5.extend([1])
    p6 = P6.to_realworld()
    t6 = T6.to_realworld()
    print(p6)  # 1.25
    print(t6)  # 350

    predict(mod_base3, P=P6, T=T6)  # 7431
    # Actual: 7378  # Not matching; rebuild the model eventually.
    y6 = y5.extend([7378])
Exemplo n.º 8
0
def case_worksheet_10():

    # Price: 0 # 0.25 above and 0.25 $/part below
    p = c(0.75,
          0.75,
          0.65,
          0.85,
          0.65,
          0.85,
          center=0.75,
          range=[0.65, 0.85],
          name="Price",
          units='$/part')
    t = c(325,
          325,
          250,
          250,
          400,
          400,
          center=325,
          range=[250, 400],
          name='Throughput',
          units='parts/hour')
    P1 = p.to_coded()
    T1 = t.to_coded()
    y1 = c(7740,
           7755,
           5651,
           5812,
           7363,
           7397,
           name="Response: profit per hour",
           units="$/hour")
    expt1 = gather(P=P1, T=T1, y=y1, title="First experiment")

    mod_base1 = lm("y ~ P * T", data=expt1)
    summary(mod_base1)
    contour_plot(mod_base1, "P", "T", show=False)

    # Predict the points, using the model:
    prediction_1 = predict(mod_base1, P=P1, T=T1)
    print(prediction_1)
    print(y1 - prediction_1)

    # We see clear non-linearity, especially when viewed in the direction of T

    # Try anyway to make a prediction, to verify it
    # P ~ 0.15 and T ~ 2.0:
    P2 = P1.extend([0.15])
    T2 = T1.extend([2.0])
    p2 = P2.to_realworld()
    t2 = T2.to_realworld()
    print(t2)  # 0.765
    print(p2)  # 475
    print(predict(mod_base1, P=P2, T=T2))

    # Should have a predicted profit of 8599, but actual is 4654.
    # Confirms our model is in a very nonlinear region in the T=Throughput
    # direction.

    # Perhaps our factorial was far too big. Make the range smaller in T.
    # Prior range = [250;400]; now try [287.5; ]

    # Second factorial: re-use some of the points
    # * Original center point become bottom left
    # * Original (+1, +1) become top right
    p3 = c(0.75,
           0.85,
           0.75,
           0.85,
           0.65,
           0.85,
           0.765,
           center=0.80,
           range=[0.75, 0.85],
           name="Price",
           units='$/part')
    t3 = c(325,
           325,
           400,
           400,
           400,
           250,
           475,
           center=(325 + 400) / 2,
           range=(325, 400),
           name='Throughput',
           units='parts/hour')

    # 2nd,
    y3 = c(7755,
           7784,
           7373,
           7397,
           7363,
           5812,
           4654,
           name="Response: profit per hour",
           units="$/hour")
    P3 = p3.to_coded()
    T3 = t3.to_coded()
    expt3 = gather(P=P3, T=T3, y=y3, title="Smaller ranges")
    mod_base3 = lm("y ~ P * T", data=expt3)
    summary(mod_base3)
    contour_plot(mod_base3, "P", "T")

    # Predict directly from least squares model, the next experiment
    # at coded values of (+2, +2) seems good
    predict(mod_base3, P=+2, T=+2)
    # Prediction is 7855

    # In RW units that corresponds to: p=0.9 and t=437.5 = 438 parts/hour
    P4 = P3.extend([+2])
    T4 = T3.extend([+2])
    print(P4.to_realworld())
    print(T4.to_realworld())

    # ACTUAL value achieved is 6325. Not a good prediction yet either.
    # Add this point to the model. This point is below any of the base factorial
    # points!
    y4 = y3.extend([6325])
    expt4 = gather(P=P4, T=T4, y=y4, title="Adding the next exploration")
    mod_base4 = lm("y ~ P * T", data=expt4)
    contour_plot(mod_base4, "P", "T")

    # It is clear that this model does not meet our needs. We need a model with
    # quadratic fitting, nonlinear terms, to estimate the nonlinear surface.
    expt5 = expt4.copy()
    mod_base5 = lm("y ~ P*T + I(P**2) + I(T**2)", data=expt5)
    print(summary(mod_base5))

    # add the xlim input in a second round
    contour_plot(mod_base5, "P", "T", xlim=(-2, 4))

    # Run at (P=3, T=-0.3) for the next run
    P6 = P4.extend([+3])
    T6 = T4.extend([-0.3])
    print(P6.to_realworld())
    print(T6.to_realworld())

    # Corresponds to p = 0.95 $/part, t=351 parts/hour
    # Predict = 7939
    # Actual = 7969. Really good matching.
    # UPdate the model and check
    y6 = y4.extend([7969])
    expt6 = gather(P=P6,
                   T=T6,
                   y=y6,
                   title="After extrapolation, based on quadratic term")
    mod_base6 = lm("y ~ P*T + I(P**2) + I(T**2)", data=expt6)
    contour_plot(mod_base6, "P", "T", xlim=(-2, 5))

    # Extrapolate again to (P=5, T=-0.3) for the next run
    P7 = P6.extend([+5])
    T7 = T6.extend([-0.3])
    print(P7.to_realworld())
    print(T7.to_realworld())
    predict(mod_base6, P=5, T=-0.3)

    # to P = 1.05, T=351 parts/hour
    # Predict = 7982
    # Actual = 8018. Better than predicted. Perhaps surface is a steeper quadratic.
    # Update the model and check
    y7 = y6.extend([7982])
    expt7 = gather(P=P7, T=T7, y=y7, title="With 2 extrapolations")
    mod_base7 = lm("y ~ P*T + I(P**2) + I(T**2)", data=expt7)
    contour_plot(mod_base7, "P", "T", xlim=(-2, 148))
Exemplo n.º 9
0
def case_worksheet_5():
    """
    We have a bioreactor system, and we are investigating four factors:
    A = feed rate 				          5 g/min or   8 g/min
    B = initial inoculant amount 		300 g     or 400 g
    C = feed substrate concentration 	 40 g/L   or  60 g/L
    D = dissolved oxygen set-point 	      4 mg/L  or   5 mg/L

    The 16 experiments from a full factorial, were randomly run, and the yields
    from the bioreactor, y, are reported here in standard order:
    y = [60, 59, 63, 61, 69, 61, 94, 93, 56, 63, 70, 65, 44, 45, 78, 77].
    The yield has units of g/L.

    Without running any code or calculations, answer these questions:
    * how many 2-factor interactions are there in this full factorial: _______
    * how many 3-factor interactions are there in this full factorial: _______
    * how many 4-factor interactions are there in this full factorial: _______
    * how many terms can you fit in a full linear model, including the intercept
    * how many data points do you have to fit this model: _________
    * what will be the value of R2 if you fit this full linear model: _______
    * and the standard error will be exactly: ________


    Run your adjusted code and check that the values of A, B, C and D in the
    vector are in the order they should be.

    Use a Pareto-plot to identify the significant effects.

    Rebuild the model now without the factor, or factors which have the least
    influence on the model. Compare the existing model with this newly updated
    model. What do you notice about the coefficients?


    What would be your advice to your colleagues to improve the yield?

    Which main effects should you change?

    Will the interaction(s) of this main effect, or these main effects,
    work in your favour, or work against you?

    Make predictions of experiments in several directions of the main factors,
    to try to maximize the reactor yield.
    """

    A, B, C, D = full_factorial(4, names=['A', 'B', 'C', 'D'])

    A = supplement(A, name='Feed rate', units='g/min', lo=5, high=8.0)
    B = supplement(B,
                   name='Initial inoculant amount',
                   units='g',
                   lo=300,
                   hi=400)
    C = supplement(C,
                   name='Feed substrate concentration',
                   units='g/L',
                   lo=40,
                   hi=60)
    D = supplement(D,
                   name='Dissolved oxygen set-point',
                   units='mg/L',
                   lo=4,
                   hi=5)

    y = c(60,
          59,
          63,
          61,
          69,
          61,
          94,
          93,
          56,
          63,
          70,
          65,
          44,
          45,
          78,
          77,
          units='g/L',
          name='Yield')

    expt = gather(A=A,
                  B=B,
                  C=C,
                  D=D,
                  y=y,
                  title='Initial experiments; full factorial')
    model_start = lm("y ~ A*B*C*D", expt)

    summary(model_start)
    #pareto_plot(model_start, plot_width=800)
    contour_plot(model_start, "A", "B")
    contour_plot(model_start, "B", "C")
    contour_plot(model_start, "C", "D")