def test_limits(): p = (ggplot(df, aes('x')) + stat_function(fun=np.cos, size=2, color='blue', arrow=arrow(ends='first')) + stat_function(fun=np.cos, xlim=(10, 20), size=2, color='red', arrow=arrow(ends='last'))) assert p == 'limits'
def test_exceptions(): # no x limits with pytest.raises(PlotnineError): p = ggplot(df) print(p + stat_function(fun=np.sin)) # fun not callable with pytest.raises(PlotnineError): p = ggplot(df, aes('x')) print(p + stat_function(fun=1))
def test_args(): def fun(x, f=lambda x: x, mul=1, add=0): return f(x)*mul + add # no args, single arg, tuple of args, dict of args p = (ggplot(df, aes('x')) + stat_function(fun=fun, size=2, color='blue') + stat_function(fun=fun, size=2, color='red', args=np.cos) + stat_function(fun=fun, size=2, color='green', args=(np.cos, 2, 1)) + stat_function(fun=fun, size=2, color='purple', args=dict(f=np.cos, mul=3, add=2))) assert p == 'args'
def fit_curve(self): df = load_protobowl() # convert prompt to false df.result = df.result.apply(lambda x: x is True) xy = list(zip(df.relative_position.tolist(), df.result.tolist())) xy = sorted(xy, key=lambda x: x[0]) ratios = dict() cnt = 0 for x, y in xy: x = int(x * 1000) ratios[x] = cnt cnt += y ratios = sorted(ratios.items(), key=lambda x: x[0]) ratios = [(x / 1000, y) for x, y in ratios] ttl_correct = df.result.tolist().count(True) ttl_correct = len(xy) curve = [(x, 1 - y / ttl_correct) for x, y in ratios] X, y = list(map(list, zip(*curve))) X = np.asarray(X) y = np.asarray(y) degree = 3 polynomial_features = PolynomialFeatures(degree=degree, include_bias=False) linear_regression = LinearRegression() pipeline = Pipeline([ ("polynomial_features", polynomial_features), ("linear_regression", linear_regression), ]) pipeline.fit(X[:, np.newaxis], y) print(pipeline.steps[1][1].coef_) def get_weight(x): return pipeline.predict(np.asarray([[x]]))[0] ddf = pd.DataFrame({"x": X, "y": y}) p0 = (ggplot(ddf, aes(x="x", y="y")) + geom_point(size=0.3, color="blue", alpha=0.5, shape="+") + stat_function(fun=get_weight, color="red", size=2, alpha=0.5) + labs(x="Position", y="Weight")) p0.save("output/reporting/curve_score.pdf") p0.draw() return pipeline
def fit_curve(self): df, questions = load_protobowl() # convert prompt to false df.result = df.result.apply(lambda x: x is True) xy = list(zip(df.relative_position.tolist(), df.result.tolist())) xy = sorted(xy, key=lambda x: x[0]) ratios = dict() cnt = 0 for x, y in xy: x = int(x*1000) ratios[x] = cnt cnt += y ratios = sorted(ratios.items(), key=lambda x: x[0]) ratios = [(x / 1000, y) for x, y in ratios] ttl_correct = df.result.tolist().count(True) ttl_correct = len(xy) curve = [(x, 1 - y / ttl_correct) for x, y in ratios] X, y = list(map(list, zip(*curve))) X = np.asarray(X) y = np.asarray(y) degree = 3 polynomial_features = PolynomialFeatures(degree=degree, include_bias=False) linear_regression = LinearRegression() pipeline = Pipeline([("polynomial_features", polynomial_features), ("linear_regression", linear_regression)]) pipeline.fit(X[:, np.newaxis], y) print(pipeline.steps[1][1].coef_) def get_weight(x): return pipeline.predict(np.asarray([[x]]))[0] ddf = pd.DataFrame({'x': X, 'y': y}) p0 = ggplot(ddf, aes(x='x', y='y')) \ + geom_point(size=0.3, color='blue', alpha=0.5, shape='+') \ + stat_function(fun=get_weight, color='red', size=2, alpha=0.5) \ + labs(x='Position', y='Weight') p0.save('output/reporting/curve_score.pdf') p0.draw() return pipeline