def color_dict(d: dict, idx: int = 0): if "color" in d: if isinstance(d["color"], str): return d["color"] else: return d["color"][idx % len(d["color"])] else: from verticapy.plot import gen_colors return gen_colors()[idx % len(gen_colors())]
def regression_tree_plot( X: list, y: str, input_relation: str, cursor=None, max_nb_points: int = 10000, ax=None, **style_kwds, ): check_types( [ ("X", X, [list],), ("y", y, [str],), ("input_relation", input_relation, [str],), ("max_nb_points", max_nb_points, [int, float],), ] ) cursor, conn = check_cursor(cursor)[0:2] query = "SELECT {}, {}, {} FROM {} WHERE {} IS NOT NULL AND {} IS NOT NULL AND {} IS NOT NULL ORDER BY RANDOM() LIMIT {}".format( X[0], X[1], y, input_relation, X[0], X[1], y, int(max_nb_points), ) cursor.execute(query) all_points = cursor.fetchall() if not (ax): fig, ax = plt.subplots() if isnotebook(): fig.set_size_inches(8, 6) ax.set_axisbelow(True) ax.grid() x0, x1, y0, y1 = ( [float(item[0]) for item in all_points], [float(item[0]) for item in all_points], [float(item[2]) for item in all_points], [float(item[1]) for item in all_points], ) x0, y0 = zip(*sorted(zip(x0, y0))) x1, y1 = zip(*sorted(zip(x1, y1))) color = "black" if "color" in style_kwds: if not (isinstance(style_kwds["color"], str)) and len(style_kwds["color"]) > 1: color = style_kwds["color"][1] ax.step(x1, y1, color=color) param = { "marker": "o", "color": gen_colors()[0], "s": 50, "edgecolors": "black", } ax.scatter( x0, y0, **updated_dict(param, style_kwds,), ) ax.set_xlabel(X[0]) ax.set_ylabel(y) if conn: conn.close() return ax
def plot_stepwise_ml(x: list, y: list, z: list = [], w: list = [], var: list = [], x_label: str = "n_features", y_label: str = "score", direction = "forward", ax=None, **style_kwds): colors = gen_colors() if not(ax): fig, ax = plt.subplots() if isnotebook(): fig.set_size_inches(8, 6) ax.grid(axis = "y") ax.set_axisbelow(True) sign = "+" if direction == "forward" else "-" x_new, y_new, z_new = [], [], [] for idx in range(len(x)): if idx == 0 or w[idx][0] == sign: x_new += [x[idx]] y_new += [y[idx]] z_new += [z[idx]] if len(var[0]) > 3: var0 = var[0][0:2] + ["..."] + var[0][-1:] else: var0 = var[0] if len(var[1]) > 3: var1 = var[1][0:2] + ["..."] + var[1][-1:] else: var1 = var[1] if "color" in style_kwds: if isinstance(style_kwds["color"], str): c0, c1 = style_kwds["color"], colors[1] else: c0, c1 = style_kwds["color"][0], style_kwds["color"][1] else: c0, c1 = colors[0], colors[1] if "color" in style_kwds: del style_kwds["color"] if direction == "forward": delta_ini, delta_final = 0.1, -0.15 rot_ini, rot_final = -90, 90 verticalalignment_init, verticalalignment_final = "top", "bottom" horizontalalignment = "center" else: delta_ini, delta_final = 0.35, -0.3 rot_ini, rot_final = 90, -90 verticalalignment_init, verticalalignment_final = "top", "bottom" horizontalalignment = "left" param = {"marker": "s", "alpha": 0.5, "edgecolors": "black", "s": 400} ax.scatter(x_new[1:-1], y_new[1:-1], c=c0, **updated_dict(param, style_kwds,),) ax.scatter([x_new[0], x_new[-1]], [y_new[0], y_new[-1]], c=c1, **updated_dict(param, style_kwds,),) ax.text(x_new[0] + delta_ini, y_new[0], "Initial Variables: {}".format("["+", ".join(var0)+"]"), rotation = rot_ini, verticalalignment=verticalalignment_init,) for idx in range(1, len(x_new)): dx, dy = x_new[idx] - x_new[idx - 1], y_new[idx] - y_new[idx - 1] ax.arrow(x_new[idx - 1], y_new[idx - 1], dx, dy, fc='k', ec='k', alpha=0.2) ax.text((x_new[idx] + x_new[idx - 1]) / 2, (y_new[idx] + y_new[idx - 1]) / 2, sign + " " + z_new[idx], rotation = rot_ini) if direction == "backward": ax.set_xlim(max(x) + 0.1 * (1 + max(x) - min(x)), min(x) - 0.1 - 0.1 * (1 + max(x) - min(x))) ax.text(x_new[-1] + delta_final, y_new[-1], "Final Variables: {}".format("["+", ".join(var1)+"]"), rotation = rot_final, verticalalignment=verticalalignment_final, horizontalalignment=horizontalalignment,) ax.set_xticks(x_new) ax.set_xlabel(x_label) ax.set_ylabel(y_label) return ax
def plot_pca_circle( x: list, y: list, variable_names: list = [], explained_variance: tuple = (None, None), dimensions: tuple = (1, 2), ax=None, **style_kwds, ): colors = gen_colors() if "color" in style_kwds: colors[0] = style_kwds["color"] circle1 = plt.Circle((0, 0), 1, edgecolor=colors[0], facecolor="none") if not (ax): fig, ax = plt.subplots() if isnotebook(): fig.set_size_inches(6, 6) ax.set_axisbelow(True) n = len(x) ax.add_patch(circle1) for i in range(n): ax.arrow(0, 0, x[i], y[i], head_width=0.05, color="black", length_includes_head=True) ax.text(x[i], y[i], variable_names[i]) ax.plot([-1.1, 1.1], [0.0, 0.0], linestyle="--", color="black") ax.plot([0.0, 0.0], [-1.1, 1.1], linestyle="--", color="black") ax.set_xlabel("Dim{} {}".format( dimensions[0], "" if not (explained_variance[0]) else "({}%)".format( round(explained_variance[0] * 100, 1)), )) ax.set_ylabel("Dim{} {}".format( dimensions[1], "" if not (explained_variance[1]) else "({}%)".format( round(explained_variance[1] * 100, 1)), )) ax.xaxis.set_ticks_position("bottom") ax.yaxis.set_ticks_position("left") ax.set_xlim(-1.1, 1.1) ax.set_ylim(-1.1, 1.1) return ax
def lift_chart( y_true: str, y_score: str, input_relation: (str, vDataFrame), cursor=None, pos_label: (int, float, str) = 1, nbins: int = 30, ax=None, **style_kwds, ): """ --------------------------------------------------------------------------- Draws the Lift Chart. Parameters ---------- y_true: str Response column. y_score: str Prediction Probability. input_relation: str/vDataFrame Relation to use to do the scoring. The relation can be a view or a table or even a customized relation. For example, you could write: "(SELECT ... FROM ...) x" as long as an alias is given at the end of the relation. cursor: DBcursor, optional Vertica DB cursor. pos_label: int/float/str, optional To compute the Lift Chart, one of the response column class has to be the positive one. The parameter 'pos_label' represents this class. nbins: int, optional Curve number of bins. ax: Matplotlib axes object, optional The axes to plot on. **style_kwds Any optional parameter to pass to the Matplotlib functions. Returns ------- tablesample An object containing the result. For more information, see utilities.tablesample. """ check_types([ ( "y_true", y_true, [str], ), ( "y_score", y_score, [str], ), ( "input_relation", input_relation, [str, vDataFrame], ), ( "nbins", nbins, [int, float], ), ]) cursor, conn, input_relation = check_cursor(cursor, input_relation) version(cursor=cursor, condition=[8, 0, 0]) query = "SELECT LIFT_TABLE(obs, prob USING PARAMETERS num_bins = {}) OVER() FROM (SELECT (CASE WHEN {} = '{}' THEN 1 ELSE 0 END) AS obs, {}::float AS prob FROM {}) AS prediction_output" query = query.format(nbins, y_true, pos_label, y_score, input_relation) executeSQL(cursor, query, "Computing the Lift Table.") query_result = cursor.fetchall() if conn: conn.close() decision_boundary, positive_prediction_ratio, lift = ( [item[0] for item in query_result], [item[1] for item in query_result], [item[2] for item in query_result], ) decision_boundary.reverse() if not (ax): fig, ax = plt.subplots() if isnotebook(): fig.set_size_inches(8, 6) ax.set_xlabel("Cumulative Data Fraction") max_value = max([0 if elem != elem else elem for elem in lift]) lift = [max_value if elem != elem else elem for elem in lift] param1 = {"color": gen_colors()[0]} ax.plot( decision_boundary, lift, **updated_dict(param1, style_kwds, 0), ) param2 = {"color": gen_colors()[1]} ax.plot( decision_boundary, positive_prediction_ratio, **updated_dict(param2, style_kwds, 1), ) color1, color2 = color_dict(style_kwds, 0), color_dict(style_kwds, 1) if color1 == color2: color2 = gen_colors()[1] ax.fill_between(decision_boundary, positive_prediction_ratio, lift, facecolor=color1, alpha=0.2) ax.fill_between( decision_boundary, [0 for elem in decision_boundary], positive_prediction_ratio, facecolor=color2, alpha=0.2, ) ax.set_title("Lift Table") ax.set_axisbelow(True) ax.grid() color1 = mpatches.Patch(color=color1, label="Cumulative Lift") color2 = mpatches.Patch(color=color2, label="Cumulative Capture Rate") ax.legend(handles=[color1, color2], loc="center left", bbox_to_anchor=[1, 0.5]) ax.set_xlim(0, 1) ax.set_ylim(0) return tablesample(values={ "decision_boundary": decision_boundary, "positive_prediction_ratio": positive_prediction_ratio, "lift": lift, }, )
def elbow( input_relation: (str, vDataFrame), X: list = [], cursor=None, n_cluster: (tuple, list) = (1, 15), init: (str, list) = "kmeanspp", max_iter: int = 50, tol: float = 1e-4, ax=None, **style_kwds, ): """ --------------------------------------------------------------------------- Draws an Elbow Curve. Parameters ---------- input_relation: str/vDataFrame Relation to use to train the model. X: list, optional List of the predictor columns. If empty all the numerical vcolumns will be used. cursor: DBcursor, optional Vertica DB cursor. n_cluster: tuple/list, optional Tuple representing the number of cluster to start with and to end with. It can also be customized list with the different K to test. init: str/list, optional The method to use to find the initial cluster centers. kmeanspp : Use the KMeans++ method to initialize the centers. random : The initial centers It can be also a list with the initial cluster centers to use. max_iter: int, optional The maximum number of iterations the algorithm performs. tol: float, optional Determines whether the algorithm has converged. The algorithm is considered converged after no center has moved more than a distance of 'tol' from the previous iteration. ax: Matplotlib axes object, optional The axes to plot on. **style_kwds Any optional parameter to pass to the Matplotlib functions. Returns ------- tablesample An object containing the result. For more information, see utilities.tablesample. """ check_types([ ( "X", X, [list], ), ( "input_relation", input_relation, [str, vDataFrame], ), ( "n_cluster", n_cluster, [list], ), ( "init", init, ["kmeanspp", "random"], ), ( "max_iter", max_iter, [int, float], ), ( "tol", tol, [int, float], ), ]) cursor, conn = check_cursor(cursor, input_relation)[0:2] version(cursor=cursor, condition=[8, 0, 0]) if isinstance(n_cluster, tuple): L = range(n_cluster[0], n_cluster[1]) else: L = n_cluster L.sort() schema, relation = schema_relation(input_relation) all_within_cluster_SS = [] if isinstance(n_cluster, tuple): L = [i for i in range(n_cluster[0], n_cluster[1])] else: L = n_cluster L.sort() for i in L: cursor.execute( "DROP MODEL IF EXISTS {}.VERTICAPY_KMEANS_TMP_{}".format( schema, get_session(cursor))) from verticapy.learn.cluster import KMeans model = KMeans( "{}.VERTICAPY_KMEANS_TMP_{}".format(schema, get_session(cursor)), cursor, i, init, max_iter, tol, ) model.fit(input_relation, X) all_within_cluster_SS += [float(model.metrics_.values["value"][3])] model.drop() if conn: conn.close() if not (ax): fig, ax = plt.subplots() if isnotebook(): fig.set_size_inches(8, 6) ax.grid(axis="y") param = { "color": gen_colors()[0], "marker": "o", "markerfacecolor": "white", "markersize": 7, "markeredgecolor": "black", } ax.plot( L, all_within_cluster_SS, **updated_dict(param, style_kwds), ) ax.set_title("Elbow Curve") ax.set_xlabel("Number of Clusters") ax.set_ylabel("Between-Cluster SS / Total SS") values = {"index": L, "Within-Cluster SS": all_within_cluster_SS} return tablesample(values=values)
def roc_curve( y_true: str, y_score: str, input_relation: (str, vDataFrame), cursor=None, pos_label: (int, float, str) = 1, nbins: int = 30, auc_roc: bool = False, best_threshold: bool = False, cutoff_curve: bool = False, ax=None, **style_kwds, ): """ --------------------------------------------------------------------------- Draws the ROC Curve. Parameters ---------- y_true: str Response column. y_score: str Prediction Probability. input_relation: str/vDataFrame Relation to use to do the scoring. The relation can be a view or a table or even a customized relation. For example, you could write: "(SELECT ... FROM ...) x" as long as an alias is given at the end of the relation. cursor: DBcursor, optional Vertica DB cursor. pos_label: int/float/str, optional To compute the PRC Curve, one of the response column class has to be the positive one. The parameter 'pos_label' represents this class. nbins: int, optional Curve number of bins. auc_roc: bool, optional If set to true, the function will return the ROC AUC without drawing the curve. best_threshold: bool, optional If set to True, the function will return the best threshold without drawing the curve. The best threshold is the threshold of the point which is the farest from the random line. cutoff_curve: bool, optional If set to True, the Cutoff curve will be drawn. ax: Matplotlib axes object, optional The axes to plot on. **style_kwds Any optional parameter to pass to the Matplotlib functions. Returns ------- tablesample An object containing the result. For more information, see utilities.tablesample. """ check_types([ ( "y_true", y_true, [str], ), ( "y_score", y_score, [str], ), ( "input_relation", input_relation, [str, vDataFrame], ), ( "nbins", nbins, [int, float], ), ( "auc_roc", auc_roc, [bool], ), ( "best_threshold", best_threshold, [bool], ), ( "cutoff_curve", cutoff_curve, [bool], ), ]) cursor, conn, input_relation = check_cursor(cursor, input_relation) version(cursor=cursor, condition=[8, 0, 0]) query = "SELECT decision_boundary, false_positive_rate, true_positive_rate FROM (SELECT ROC(obs, prob USING PARAMETERS num_bins = {}) OVER() FROM (SELECT (CASE WHEN {} = '{}' THEN 1 ELSE 0 END) AS obs, {}::float AS prob FROM {}) AS prediction_output) x" query = query.format(nbins, y_true, pos_label, y_score, input_relation) executeSQL(cursor, query, "Computing the ROC Table.") query_result = cursor.fetchall() if conn: conn.close() threshold, false_positive, true_positive = ( [item[0] for item in query_result], [item[1] for item in query_result], [item[2] for item in query_result], ) auc = 0 for i in range(len(false_positive) - 1): if false_positive[i + 1] - false_positive[i] != 0.0: a = (true_positive[i + 1] - true_positive[i]) / ( false_positive[i + 1] - false_positive[i]) b = true_positive[i + 1] - a * false_positive[i + 1] auc = (auc + a * (false_positive[i + 1] * false_positive[i + 1] - false_positive[i] * false_positive[i]) / 2 + b * (false_positive[i + 1] - false_positive[i])) auc = -auc auc = min(auc, 1.0) if auc_roc: return auc if best_threshold: l = [abs(y - x) for x, y in zip(false_positive, true_positive)] best_threshold_arg = max(zip(l, range(len(l))))[1] best = max(threshold[best_threshold_arg], 0.001) best = min(best, 0.999) return best if not (ax): fig, ax = plt.subplots() if isnotebook(): fig.set_size_inches(8, 6) color1, color2 = color_dict(style_kwds, 0), color_dict(style_kwds, 1) if color1 == color2: color2 = gen_colors()[1] if cutoff_curve: ax.plot( threshold, [1 - item for item in false_positive], label="Specificity", **updated_dict({"color": gen_colors()[0]}, style_kwds), ) ax.plot( threshold, true_positive, label="Sensitivity", **updated_dict({"color": gen_colors()[1]}, style_kwds), ) ax.fill_between( threshold, [1 - item for item in false_positive], true_positive, facecolor="black", alpha=0.02, ) ax.set_xlabel("Decision Boundary") ax.set_title("Cutoff Curve") ax.legend(loc="center left", bbox_to_anchor=[1, 0.5]) else: ax.set_xlabel("False Positive Rate (1-Specificity)") ax.set_ylabel("True Positive Rate (Sensitivity)") ax.plot( false_positive, true_positive, **updated_dict({"color": gen_colors()[0]}, style_kwds), ) ax.fill_between(false_positive, false_positive, true_positive, facecolor=color1, alpha=0.1) ax.fill_between([0, 1], [0, 0], [0, 1], facecolor=color2, alpha=0.1) ax.plot([0, 1], [0, 1], color=color2) ax.set_title("ROC Curve") ax.text( 0.995, 0, "AUC = " + str(round(auc, 4) * 100) + "%", verticalalignment="bottom", horizontalalignment="right", fontsize=11.5, ) ax.set_ylim(0, 1) ax.set_xlim(0, 1) ax.set_axisbelow(True) ax.grid() return tablesample(values={ "threshold": threshold, "false_positive": false_positive, "true_positive": true_positive, }, )
def plot_acf_pacf( vdf: vDataFrame, column: str, ts: str, by: list = [], p: (int, list) = 15, **style_kwds, ): """ --------------------------------------------------------------------------- Draws the ACF and PACF Charts. Parameters ---------- vdf: vDataFrame Input vDataFrame. column: str Response column. ts: str vcolumn used as timeline. It will be to use to order the data. It can be a numerical or type date like (date, datetime, timestamp...) vcolumn. by: list, optional vcolumns used in the partition. p: int/list, optional Int equals to the maximum number of lag to consider during the computation or List of the different lags to include during the computation. p must be positive or a list of positive integers. **style_kwds Any optional parameter to pass to the Matplotlib functions. Returns ------- tablesample An object containing the result. For more information, see utilities.tablesample. """ check_types([ ( "column", column, [str], ), ( "ts", ts, [str], ), ( "by", by, [list], ), ( "p", p, [int, float], ), ( "vdf", vdf, [ vDataFrame, ], ), ]) tmp_style = {} for elem in style_kwds: if elem not in ("color", "colors"): tmp_style[elem] = style_kwds[elem] if "color" in style_kwds: color = style_kwds["color"] else: color = gen_colors()[0] columns_check([column, ts] + by, vdf) by = vdf_columns_names(by, vdf) column, ts = vdf_columns_names([column, ts], vdf) acf = vdf.acf(ts=ts, column=column, by=by, p=p, show=False) pacf = vdf.pacf(ts=ts, column=column, by=by, p=p, show=False) result = tablesample( { "index": [i for i in range(0, len(acf.values["value"]))], "acf": acf.values["value"], "pacf": pacf.values["value"], "confidence": pacf.values["confidence"], }, ) fig = plt.figure(figsize=(10, 6)) if isnotebook() else plt.figure(figsize=(10, 6)) plt.rcParams["axes.facecolor"] = "#FCFCFC" ax1 = fig.add_subplot(211) x, y, confidence = ( result.values["index"], result.values["acf"], result.values["confidence"], ) plt.xlim(-1, x[-1] + 1) ax1.bar( x, y, width=0.007 * len(x), color="#444444", zorder=1, linewidth=0, ) param = { "s": 90, "marker": "o", "facecolors": color, "edgecolors": "black", "zorder": 2, } ax1.scatter( x, y, **updated_dict( param, tmp_style, ), ) ax1.plot( [-1] + x + [x[-1] + 1], [0 for elem in range(len(x) + 2)], color=color, zorder=0, ) ax1.fill_between(x, confidence, color="#FE5016", alpha=0.1) ax1.fill_between(x, [-elem for elem in confidence], color="#FE5016", alpha=0.1) ax1.set_title("Autocorrelation") y = result.values["pacf"] ax2 = fig.add_subplot(212) ax2.bar(x, y, width=0.007 * len(x), color="#444444", zorder=1, linewidth=0) ax2.scatter( x, y, **updated_dict( param, tmp_style, ), ) ax2.plot( [-1] + x + [x[-1] + 1], [0 for elem in range(len(x) + 2)], color=color, zorder=0, ) ax2.fill_between(x, confidence, color="#FE5016", alpha=0.1) ax2.fill_between(x, [-elem for elem in confidence], color="#FE5016", alpha=0.1) ax2.set_title("Partial Autocorrelation") plt.show() return result
def regression_plot( X: list, y: str, input_relation: str, coefficients: list, max_nb_points: int = 50, ax=None, **style_kwds, ): check_types([ ("X", X, [list]), ("y", y, [str]), ("input_relation", input_relation, [str]), ("coefficients", coefficients, [list]), ("max_nb_points", max_nb_points, [int, float]), ]) param = { "marker": "o", "color": gen_colors()[0], "s": 50, "edgecolors": "black", } if len(X) == 1: query = "SELECT {}, {} FROM {} WHERE {} IS NOT NULL AND {} IS NOT NULL LIMIT {}".format( X[0], y, input_relation, X[0], y, int(max_nb_points)) all_points = executeSQL(query, method="fetchall", print_time_sql=False) if not (ax): fig, ax = plt.subplots() if isnotebook(): fig.set_size_inches(8, 6) ax.set_axisbelow(True) ax.grid() x0, y0 = ( [float(item[0]) for item in all_points], [float(item[1]) for item in all_points], ) min_reg, max_reg = min(x0), max(x0) x_reg = [min_reg, max_reg] y_reg = [coefficients[0] + coefficients[1] * item for item in x_reg] ax.plot(x_reg, y_reg, alpha=1, color="black") ax.scatter( x0, y0, **updated_dict(param, style_kwds, 0), ) ax.set_xlabel(X[0]) ax.set_ylabel(y) elif len(X) == 2: query = "(SELECT {}, {}, {} FROM {} WHERE {} IS NOT NULL AND {} IS NOT NULL AND {} IS NOT NULL LIMIT {})".format( X[0], X[1], y, input_relation, X[0], X[1], y, int(max_nb_points)) all_points = executeSQL(query, method="fetchall", print_time_sql=False) x0, y0, z0 = ( [float(item[0]) for item in all_points], [float(item[1]) for item in all_points], [float(item[2]) for item in all_points], ) min_reg_x, max_reg_x = min(x0), max(x0) step_x = (max_reg_x - min_reg_x) / 40.0 min_reg_y, max_reg_y = min(y0), max(y0) step_y = (max_reg_y - min_reg_y) / 40.0 X_reg = (arange(min_reg_x - 5 * step_x, max_reg_x + 5 * step_x, step_x) if (step_x > 0) else [max_reg_x]) Y_reg = (arange(min_reg_y - 5 * step_y, max_reg_y + 5 * step_y, step_y) if (step_y > 0) else [max_reg_y]) X_reg, Y_reg = np.meshgrid(X_reg, Y_reg) Z_reg = coefficients[ 0] + coefficients[1] * X_reg + coefficients[2] * Y_reg if not (ax): if isnotebook(): plt.figure(figsize=(8, 6)) ax = plt.axes(projection="3d") ax.plot_surface(X_reg, Y_reg, Z_reg, rstride=1, cstride=1, alpha=0.5, color="gray") ax.scatter( x0, y0, z0, **updated_dict(param, style_kwds, 0), ) ax.set_xlabel(X[0]) ax.set_ylabel(X[1]) ax.set_zlabel(y + " = f(" + X[0] + ", " + X[1] + ")") else: raise ParameterError("The number of predictors is too big.") return ax
def plot_var( x: list, y: list, variable_names: list = [], explained_variance: tuple = (None, None), dimensions: tuple = (1, 2), bar_name: str = "", ax=None, **style_kwds, ): colors = gen_colors() if "color" in style_kwds: colors[0] = style_kwds["color"] if not (ax): fig, ax = plt.subplots() if isnotebook(): fig.set_size_inches(6, 6) ax.set_axisbelow(True) ax.grid() else: fig = plt n = len(x) delta_y = (max(y) - min(y)) * 0.04 delta_x = (max(x) - min(x)) * 0.04 for i in range(n): ax.text(x[i], y[i] + delta_y, variable_names[i], horizontalalignment="center") param = {"marker": "^", "s": 100, "edgecolors": "black"} if "c" not in style_kwds: param["color"] = colors[0] img = ax.scatter(x, y, **updated_dict(param, style_kwds, 0)) ax.plot( [min(x) - 5 * delta_x, max(x) + 5 * delta_x], [0.0, 0.0], linestyle="--", color="black", ) ax.plot( [0.0, 0.0], [min(y) - 5 * delta_y, max(y) + 5 * delta_y], linestyle="--", color="black", ) ax.set_xlim(min(x) - 5 * delta_x, max(x) + 5 * delta_x) ax.set_ylim(min(y) - 5 * delta_y, max(y) + 5 * delta_y) ax.set_xlabel("Dim{} {}".format( dimensions[0], "" if not (explained_variance[0]) else "({}%)".format( round(explained_variance[0] * 100, 1)), )) ax.set_ylabel("Dim{} {}".format( dimensions[1], "" if not (explained_variance[1]) else "({}%)".format( round(explained_variance[1] * 100, 1)), )) ax.xaxis.set_ticks_position("bottom") ax.yaxis.set_ticks_position("left") if "c" in style_kwds: fig.colorbar(img).set_label(bar_name) return ax
def logit_plot( X: list, y: str, input_relation: str, coefficients: list, max_nb_points=50, ax=None, **style_kwds, ): check_types([ ("X", X, [list]), ("y", y, [str]), ("input_relation", input_relation, [str]), ("coefficients", coefficients, [list]), ("max_nb_points", max_nb_points, [int, float]), ]) param0 = { "marker": "o", "s": 50, "color": gen_colors()[0], "edgecolors": "black", "alpha": 0.8, } param1 = { "marker": "o", "s": 50, "color": gen_colors()[1], "edgecolors": "black", } def logit(x): return 1 / (1 + math.exp(-x)) if len(X) == 1: query = "(SELECT {}, {} FROM {} WHERE {} IS NOT NULL AND {} = 0 LIMIT {})".format( X[0], y, input_relation, X[0], y, int(max_nb_points / 2)) query += " UNION ALL (SELECT {}, {} FROM {} WHERE {} IS NOT NULL AND {} = 1 LIMIT {})".format( X[0], y, input_relation, X[0], y, int(max_nb_points / 2)) all_points = executeSQL(query, method="fetchall", print_time_sql=False) if not (ax): fig, ax = plt.subplots() if isnotebook(): fig.set_size_inches(8, 6) ax.set_axisbelow(True) ax.grid() x0, x1 = [], [] for idx, item in enumerate(all_points): if item[1] == 0: x0 += [float(item[0])] else: x1 += [float(item[0])] min_logit, max_logit = min(x0 + x1), max(x0 + x1) step = (max_logit - min_logit) / 40.0 x_logit = (arange(min_logit - 5 * step, max_logit + 5 * step, step) if (step > 0) else [max_logit]) y_logit = [ logit(coefficients[0] + coefficients[1] * item) for item in x_logit ] ax.plot(x_logit, y_logit, alpha=1, color="black") all_scatter = [ ax.scatter( x0, [ logit(coefficients[0] + coefficients[1] * item) for item in x0 ], **updated_dict(param1, style_kwds, 1), ) ] all_scatter += [ ax.scatter( x1, [ logit(coefficients[0] + coefficients[1] * item) for item in x1 ], **updated_dict(param0, style_kwds, 0), ) ] ax.set_xlabel(X[0]) ax.set_ylabel(y) ax.legend( all_scatter, [0, 1], scatterpoints=1, loc="center left", bbox_to_anchor=[1, 0.5], ) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) elif len(X) == 2: query = "(SELECT {}, {}, {} FROM {} WHERE {} IS NOT NULL AND {} IS NOT NULL AND {} = 0 LIMIT {})".format( X[0], X[1], y, input_relation, X[0], X[1], y, int(max_nb_points / 2)) query += " UNION (SELECT {}, {}, {} FROM {} WHERE {} IS NOT NULL AND {} IS NOT NULL AND {} = 1 LIMIT {})".format( X[0], X[1], y, input_relation, X[0], X[1], y, int(max_nb_points / 2)) all_points = executeSQL(query, method="fetchall", print_time_sql=False) x0, x1, y0, y1 = [], [], [], [] for idx, item in enumerate(all_points): if item[2] == 0: x0 += [float(item[0])] y0 += [float(item[1])] else: x1 += [float(item[0])] y1 += [float(item[1])] min_logit_x, max_logit_x = min(x0 + x1), max(x0 + x1) step_x = (max_logit_x - min_logit_x) / 40.0 min_logit_y, max_logit_y = min(y0 + y1), max(y0 + y1) step_y = (max_logit_y - min_logit_y) / 40.0 X_logit = (arange(min_logit_x - 5 * step_x, max_logit_x + 5 * step_x, step_x) if (step_x > 0) else [max_logit_x]) Y_logit = (arange(min_logit_y - 5 * step_y, max_logit_y + 5 * step_y, step_y) if (step_y > 0) else [max_logit_y]) X_logit, Y_logit = np.meshgrid(X_logit, Y_logit) Z_logit = 1 / (1 + np.exp(-(coefficients[0] + coefficients[1] * X_logit + coefficients[2] * Y_logit))) if not (ax): if isnotebook(): plt.figure(figsize=(8, 6)) ax = plt.axes(projection="3d") ax.plot_surface(X_logit, Y_logit, Z_logit, rstride=1, cstride=1, alpha=0.5, color="gray") all_scatter = [ ax.scatter( x0, y0, [ logit(coefficients[0] + coefficients[1] * x0[i] + coefficients[2] * y0[i]) for i in range(len(x0)) ], **updated_dict(param1, style_kwds, 1), ) ] all_scatter += [ ax.scatter( x1, y1, [ logit(coefficients[0] + coefficients[1] * x1[i] + coefficients[2] * y1[i]) for i in range(len(x1)) ], **updated_dict(param0, style_kwds, 0), ) ] ax.set_xlabel(X[0]) ax.set_ylabel(X[1]) ax.set_zlabel(y) ax.legend( all_scatter, [0, 1], scatterpoints=1, loc="center left", bbox_to_anchor=[1.1, 0.5], title=y, ncol=2, fontsize=8, ) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) else: raise ParameterError("The number of predictors is too big.") return ax
def plot_bubble_ml( x: list, y: list, s: list = None, z: list = [], x_label: str = "time", y_label: str = "score", title: str = "Model Type", reverse: tuple = (True, True), plt_text=True, ax=None, **style_kwds, ): if s: s = [min(250 + 5000 * elem, 1200) if elem != 0 else 1000 for elem in s] if z and s: data = [(x[i], y[i], s[i], z[i]) for i in range(len(x))] data.sort(key=lambda tup: str(tup[3])) x = [elem[0] for elem in data] y = [elem[1] for elem in data] s = [elem[2] for elem in data] z = [elem[3] for elem in data] elif z: data = [(x[i], y[i], z[i]) for i in range(len(x))] data.sort(key=lambda tup: str(tup[2])) x = [elem[0] for elem in data] y = [elem[1] for elem in data] z = [elem[2] for elem in data] colors = gen_colors() if not (ax): fig, ax = plt.subplots() if isnotebook(): fig.set_size_inches(8, 6) ax.grid(axis="y") ax.set_axisbelow(True) if z: current_cat = z[0] idx = 0 i = 0 j = 1 all_scatter = [] all_categories = [current_cat] tmp_colors = [] while j != len(z): while j < len(z) and z[j] == current_cat: j += 1 param = { "alpha": 0.8, "marker": "o", "color": colors[idx], "edgecolors": "black", } if s: size = s[i:j] else: size = 50 all_scatter += [ ax.scatter(x[i:j], y[i:j], s=size, **updated_dict(param, style_kwds, idx)) ] tmp_colors += [updated_dict(param, style_kwds, idx)["color"]] if j < len(z): all_categories += [z[j]] current_cat = z[j] i = j idx += 1 ax.legend( [ Line2D( [0], [0], marker="o", color="black", markerfacecolor=color, markersize=8, ) for color in tmp_colors ], all_categories, bbox_to_anchor=[1, 0.5], loc="center left", title=title, labelspacing=1, ) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) else: param = { "alpha": 0.8, "marker": "o", "color": colors[0], "edgecolors": "black" } if s: size = s else: size = 300 ax.scatter(x, y, s=size, **updated_dict(param, style_kwds, 0)) if reverse[0]: ax.set_xlim( max(x) + 0.1 * (1 + max(x) - min(x)), min(x) - 0.1 - 0.1 * (1 + max(x) - min(x)), ) if reverse[1]: ax.set_ylim( max(y) + 0.1 * (1 + max(y) - min(y)), min(y) - 0.1 * (1 + max(y) - min(y))) if plt_text: ax.set_xlabel(x_label, loc="right") ax.set_ylabel(y_label, loc="top") ax.spines["left"].set_position("center") ax.spines["bottom"].set_position("center") ax.spines["right"].set_color("none") ax.spines["top"].set_color("none") delta_x = (max(x) - min(x)) * 0.1 delta_y = (max(y) - min(y)) * 0.1 plt.text( max(x) + delta_x if reverse[0] else min(x) - delta_x, max(y) + delta_y if reverse[1] else min(y) - delta_y, "Modest", size=15, rotation=130.0, ha="center", va="center", bbox=dict(boxstyle="round", ec=gen_colors()[0], fc=gen_colors()[0], alpha=0.3), ) plt.text( max(x) + delta_x if reverse[0] else min(x) - delta_x, min(y) - delta_y if reverse[1] else max(y) + delta_y, "Efficient", size=15, rotation=30.0, ha="center", va="center", bbox=dict(boxstyle="round", ec=gen_colors()[1], fc=gen_colors()[1], alpha=0.3), ) plt.text( min(x) - delta_x if reverse[0] else max(x) + delta_x, max(y) + delta_y if reverse[1] else min(y) - delta_y, "Performant", size=15, rotation=-130.0, ha="center", va="center", bbox=dict(boxstyle="round", ec=gen_colors()[2], fc=gen_colors()[2], alpha=0.3), ) plt.text( min(x) - delta_x if reverse[0] else max(x) + delta_x, min(y) - delta_y if reverse[1] else max(y) + delta_y, "Performant & Efficient", size=15, rotation=-30.0, ha="center", va="center", bbox=dict(boxstyle="round", ec=gen_colors()[3], fc=gen_colors()[3], alpha=0.3), ) else: ax.set_xlabel(x_label) ax.set_ylabel(y_label) return ax
def lof_plot( input_relation: str, columns: list, lof: str, tablesample: float = -1, ax=None, **style_kwds, ): check_types([ ("input_relation", input_relation, [str]), ("columns", columns, [list]), ("lof", lof, [str]), ("tablesample", tablesample, [int, float]), ]) tablesample = ("TABLESAMPLE({})".format(tablesample) if (tablesample > 0 and tablesample < 100) else "") colors = [] if "color" in style_kwds: if isinstance(style_kwds["color"], str): colors = [style_kwds["color"]] else: colors = style_kwds["color"] del style_kwds["color"] elif "colors" in style_kwds: if isinstance(style_kwds["colors"], str): colors = [style_kwds["colors"]] else: colors = style_kwds["colors"] del style_kwds["colors"] colors += gen_colors() param = { "s": 50, "edgecolors": "black", "color": colors[0], } if len(columns) == 1: column = quote_ident(columns[0]) query = "SELECT {}, {} FROM {} {} WHERE {} IS NOT NULL".format( column, lof, input_relation, tablesample, column) query_result = executeSQL(query, method="fetchall", print_time_sql=False) column1, lof = ( [item[0] for item in query_result], [item[1] for item in query_result], ) column2 = [0] * len(column1) if not (ax): fig, ax = plt.subplots() if isnotebook(): fig.set_size_inches(8, 2) ax.set_axisbelow(True) ax.grid() ax.set_xlabel(column) radius = [ 2 * 1000 * (item - min(lof)) / (max(lof) - min(lof)) for item in lof ] ax.scatter( column1, column2, label="Data points", **updated_dict(param, style_kwds, 0), ) ax.scatter( column1, column2, s=radius, label="Outlier scores", facecolors="none", color=colors[1], ) elif len(columns) == 2: columns = [quote_ident(column) for column in columns] query = "SELECT {}, {}, {} FROM {} {} WHERE {} IS NOT NULL AND {} IS NOT NULL".format( columns[0], columns[1], lof, input_relation, tablesample, columns[0], columns[1], ) query_result = executeSQL(query, method="fetchall", print_time_sql=False) column1, column2, lof = ( [item[0] for item in query_result], [item[1] for item in query_result], [item[2] for item in query_result], ) if not (ax): fig, ax = plt.subplots() if isnotebook(): fig.set_size_inches(8, 6) ax.set_axisbelow(True) ax.grid() ax.set_ylabel(columns[1]) ax.set_xlabel(columns[0]) radius = [ 1000 * (item - min(lof)) / (max(lof) - min(lof)) for item in lof ] ax.scatter( column1, column2, label="Data points", **updated_dict(param, style_kwds, 0), ) ax.scatter( column1, column2, s=radius, label="Outlier scores", facecolors="none", color=colors[1], ) elif len(columns) == 3: query = "SELECT {}, {}, {}, {} FROM {} {} WHERE {} IS NOT NULL AND {} IS NOT NULL AND {} IS NOT NULL".format( columns[0], columns[1], columns[2], lof, input_relation, tablesample, columns[0], columns[1], columns[2], ) query_result = executeSQL(query, method="fetchall", print_time_sql=False) column1, column2, column3, lof = ( [float(item[0]) for item in query_result], [float(item[1]) for item in query_result], [float(item[2]) for item in query_result], [float(item[3]) for item in query_result], ) if not (ax): if isnotebook(): plt.figure(figsize=(8, 6)) ax = plt.axes(projection="3d") ax.set_xlabel(columns[0]) ax.set_ylabel(columns[1]) ax.set_zlabel(columns[2]) radius = [ 1000 * (item - min(lof)) / (max(lof) - min(lof)) for item in lof ] ax.scatter( column1, column2, column3, label="Data points", **updated_dict(param, style_kwds, 0), ) ax.scatter( column1, column2, column3, s=radius, facecolors="none", color=colors[1], ) ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 1.0)) ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 1.0)) ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 1.0)) else: raise Exception( "LocalOutlierFactor Plot is available for a maximum of 3 columns") return ax
def voronoi_plot( clusters: list, columns: list, input_relation: str, max_nb_points: int = 1000, plot_crosses: bool = True, ax=None, **style_kwds, ): check_types([ ("clusters", clusters, [list]), ("columns", columns, [list]), ("input_relation", input_relation, [str]), ("max_nb_points", max_nb_points, [int]), ]) from scipy.spatial import voronoi_plot_2d, Voronoi min_x, max_x, min_y, max_y = ( min([elem[0] for elem in clusters]), max([elem[0] for elem in clusters]), min([elem[1] for elem in clusters]), max([elem[1] for elem in clusters]), ) dummies_point = [ [min_x - 999, min_y - 999], [min_x - 999, max_y + 999], [max_x + 999, min_y - 999], [max_x + 999, max_y + 999], ] v = Voronoi(clusters + dummies_point) param = {"show_vertices": False} voronoi_plot_2d(v, ax=ax, **updated_dict(param, style_kwds)) if not (ax): ax = plt ax.xlabel(columns[0]) ax.ylabel(columns[1]) colors = gen_colors() for idx, region in enumerate(v.regions): if not -1 in region: polygon = [v.vertices[i] for i in region] if "color" in style_kwds: if isinstance(style_kwds["color"], str): color = style_kwds["color"] else: color = style_kwds["color"][idx % len(style_kwds["color"])] else: color = colors[idx % len(colors)] ax.fill(*zip(*polygon), alpha=0.4, color=color) ax.plot([elem[0] for elem in clusters], [elem[1] for elem in clusters], "ko") ax.xlim(min_x - 0.05 * (max_x - min_x), max_x + 0.05 * (max_x - min_x)) ax.ylim(min_y - 0.05 * (max_y - min_y), max_y + 0.05 * (max_y - min_y)) if max_nb_points > 0: query = "SELECT {}, {} FROM {} WHERE {} IS NOT NULL AND {} IS NOT NULL ORDER BY RANDOM() LIMIT {}".format( columns[0], columns[1], input_relation, columns[0], columns[1], int(max_nb_points), ) all_points = executeSQL(query, method="fetchall", print_time_sql=False) x, y = ( [float(item[0]) for item in all_points], [float(item[1]) for item in all_points], ) ax.scatter( x, y, color="black", s=10, alpha=1, zorder=3, ) if plot_crosses: ax.scatter( [elem[0] for elem in clusters], [elem[1] for elem in clusters], color="white", s=200, linewidths=5, alpha=1, zorder=4, marker="x", ) return ax
def svm_classifier_plot( X: list, y: str, input_relation: str, coefficients: list, max_nb_points: int = 500, ax=None, **style_kwds, ): check_types([ ("X", X, [list]), ("y", y, [str]), ("input_relation", input_relation, [str]), ("coefficients", coefficients, [list]), ("max_nb_points", max_nb_points, [int, float]), ]) param0 = { "marker": "o", "color": gen_colors()[0], "s": 50, "edgecolors": "black", } param1 = { "marker": "o", "color": gen_colors()[1], "s": 50, "edgecolors": "black", } if len(X) == 1: query = "(SELECT {}, {} FROM {} WHERE {} IS NOT NULL AND {} = 0 LIMIT {})".format( X[0], y, input_relation, X[0], y, int(max_nb_points / 2)) query += " UNION ALL (SELECT {}, {} FROM {} WHERE {} IS NOT NULL AND {} = 1 LIMIT {})".format( X[0], y, input_relation, X[0], y, int(max_nb_points / 2)) all_points = executeSQL(query, method="fetchall", print_time_sql=False) if not (ax): fig, ax = plt.subplots() if isnotebook(): fig.set_size_inches(8, 6) ax.set_axisbelow(True) ax.grid() x0, x1 = [], [] for idx, item in enumerate(all_points): if item[1] == 0: x0 += [float(item[0])] else: x1 += [float(item[0])] x_svm, y_svm = ( [ -coefficients[0] / coefficients[1], -coefficients[0] / coefficients[1] ], [-1, 1], ) ax.plot(x_svm, y_svm, alpha=1, color="black") all_scatter = [ ax.scatter(x0, [0 for item in x0], **updated_dict(param1, style_kwds, 1)) ] all_scatter += [ ax.scatter(x1, [0 for item in x1], **updated_dict(param0, style_kwds, 0)) ] ax.set_xlabel(X[0]) ax.legend( all_scatter, [0, 1], scatterpoints=1, loc="center left", bbox_to_anchor=[1, 0.5], ) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) elif len(X) == 2: query = "(SELECT {}, {}, {} FROM {} WHERE {} IS NOT NULL AND {} IS NOT NULL AND {} = 0 LIMIT {})".format( X[0], X[1], y, input_relation, X[0], X[1], y, int(max_nb_points / 2)) query += " UNION (SELECT {}, {}, {} FROM {} WHERE {} IS NOT NULL AND {} IS NOT NULL AND {} = 1 LIMIT {})".format( X[0], X[1], y, input_relation, X[0], X[1], y, int(max_nb_points / 2)) all_points = executeSQL(query, method="fetchall", print_time_sql=False) if not (ax): fig, ax = plt.subplots() if isnotebook(): fig.set_size_inches(8, 6) ax.set_axisbelow(True) ax.grid() x0, x1, y0, y1 = [], [], [], [] for idx, item in enumerate(all_points): if item[2] == 0: x0 += [float(item[0])] y0 += [float(item[1])] else: x1 += [float(item[0])] y1 += [float(item[1])] min_svm, max_svm = min(x0 + x1), max(x0 + x1) x_svm, y_svm = ( [min_svm, max_svm], [ -(coefficients[0] + coefficients[1] * min_svm) / coefficients[2], -(coefficients[0] + coefficients[1] * max_svm) / coefficients[2], ], ) ax.plot(x_svm, y_svm, alpha=1, color="black") all_scatter = [ ax.scatter(x0, y0, **updated_dict(param1, style_kwds, 1)) ] all_scatter += [ ax.scatter(x1, y1, **updated_dict(param0, style_kwds, 0)) ] ax.set_xlabel(X[0]) ax.set_ylabel(X[1]) ax.legend( all_scatter, [0, 1], scatterpoints=1, loc="center left", bbox_to_anchor=[1, 0.5], ) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) elif len(X) == 3: query = "(SELECT {}, {}, {}, {} FROM {} WHERE {} IS NOT NULL AND {} IS NOT NULL AND {} IS NOT NULL AND {} = 0 LIMIT {})".format( X[0], X[1], X[2], y, input_relation, X[0], X[1], X[2], y, int(max_nb_points / 2), ) query += " UNION (SELECT {}, {}, {}, {} FROM {} WHERE {} IS NOT NULL AND {} IS NOT NULL AND {} IS NOT NULL AND {} = 1 LIMIT {})".format( X[0], X[1], X[2], y, input_relation, X[0], X[1], X[2], y, int(max_nb_points / 2), ) all_points = executeSQL(query, method="fetchall", print_time_sql=False) x0, x1, y0, y1, z0, z1 = [], [], [], [], [], [] for idx, item in enumerate(all_points): if item[3] == 0: x0 += [float(item[0])] y0 += [float(item[1])] z0 += [float(item[2])] else: x1 += [float(item[0])] y1 += [float(item[1])] z1 += [float(item[2])] min_svm_x, max_svm_x = min(x0 + x1), max(x0 + x1) step_x = (max_svm_x - min_svm_x) / 40.0 min_svm_y, max_svm_y = min(y0 + y1), max(y0 + y1) step_y = (max_svm_y - min_svm_y) / 40.0 X_svm = (arange(min_svm_x - 5 * step_x, max_svm_x + 5 * step_x, step_x) if (step_x > 0) else [max_svm_x]) Y_svm = (arange(min_svm_y - 5 * step_y, max_svm_y + 5 * step_y, step_y) if (step_y > 0) else [max_svm_y]) X_svm, Y_svm = np.meshgrid(X_svm, Y_svm) Z_svm = coefficients[ 0] + coefficients[1] * X_svm + coefficients[2] * Y_svm if not (ax): if isnotebook(): plt.figure(figsize=(8, 6)) ax = plt.axes(projection="3d") ax.plot_surface(X_svm, Y_svm, Z_svm, rstride=1, cstride=1, alpha=0.5, color="gray") param0["alpha"] = 0.8 all_scatter = [ ax.scatter(x0, y0, z0, **updated_dict(param1, style_kwds, 1)) ] all_scatter += [ ax.scatter(x1, y1, z1, **updated_dict(param0, style_kwds, 0)) ] ax.set_xlabel(X[0]) ax.set_ylabel(X[1]) ax.set_zlabel(X[2]) ax.legend( all_scatter, [0, 1], scatterpoints=1, title=y, loc="center left", bbox_to_anchor=[1.1, 0.5], ncol=1, fontsize=8, ) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) else: raise ParameterError("The number of predictors is too big.") return ax
def plot_var(self, dimensions: tuple = (1, 2), method: str = "auto", ax=None, **style_kwds): """ --------------------------------------------------------------------------- Draws the MCA (multiple correspondence analysis) graph. Parameters ---------- dimensions: tuple, optional Tuple of two IDs of the model's components. method: str, optional Method used to draw the plot. auto : Only the variables are displayed. cos2 : The cos2 is used as CMAP. contrib: The feature contribution is used as CMAP. ax: Matplotlib axes object, optional The axes to plot on. **style_kwds Any optional parameter to pass to the Matplotlib functions. Returns ------- ax Matplotlib axes object """ check_types([ ("dimensions", dimensions, [tuple]), ("method", method, ["auto", "cos2", "contrib"]), ]) x = self.components_["PC{}".format(dimensions[0])] y = self.components_["PC{}".format(dimensions[1])] n = len(self.cos2_["PC{}".format(dimensions[0])]) if method in ("cos2", "contrib"): if method == "cos2": c = [ self.cos2_["PC{}".format(dimensions[0])][i] + self.cos2_["PC{}".format(dimensions[1])][i] for i in range(n) ] else: sum_1, sum_2 = ( sum(self.cos2_["PC{}".format(dimensions[0])]), sum(self.cos2_["PC{}".format(dimensions[1])]), ) c = [ 0.5 * 100 * (self.cos2_["PC{}".format(dimensions[0])][i] / sum_1 + self.cos2_["PC{}".format(dimensions[1])][i] / sum_2) for i in range(n) ] style_kwds["c"] = c if "cmap" not in style_kwds: from verticapy.plot import gen_cmap, gen_colors style_kwds["cmap"] = gen_cmap( color=[gen_colors()[0], gen_colors()[1], gen_colors()[2]]) explained_variance = self.explained_variance_["explained_variance"] return plot_var(x, y, self.X, ( explained_variance[dimensions[0] - 1], explained_variance[dimensions[1] - 1], ), dimensions, method, ax, **style_kwds)