Exemplo n.º 1
0
    def predict(self, selection: List[int], dataset) -> List[Prediction]:
        output, unique_vals = self.processOutput()
        sels = np.array(selection)
        ids = dataset["id"]

        preds: List[Prediction] = [
            Prediction(
                rank=rank_jaccard(vals.values, sels),
                intent=self.intentType,
                memberIds=self.getMemberIds(vals.values, ids),
                dimensions=self.getDimensionArr(),
                info=self.getClusterParams(
                    dataset.loc[vals.values.astype(bool),
                                self.getDimensionArr()].values,
                    u,
                ),
                algorithm=self.algorithm,
                membership=getStats(
                    self.getMemberIds(vals.values, ids),
                    ids[sels.astype(bool)].tolist(),
                ),
                description=self.description,
            ) for (_, vals), u in zip(output.iteritems(), unique_vals)
        ]

        return preds
Exemplo n.º 2
0
def range_intent(dataset,
                 dimensions,
                 selection,
                 max_depth=None) -> List[Prediction]:
    selection = np.array(selection)
    selected_ids = dataset.loc[selection.astype(bool), "id"]
    data = dataset[dimensions]
    clf = tree.DecisionTreeClassifier(max_depth=max_depth)
    clf.fit(data, selection)

    rules = get_decision_paths(clf, data, selection)

    member_ids = dataset.id.tolist()

    mask: Any = get_mask_from_rules(data, rules)

    if mask is not None:
        member_ids = dataset.loc[mask, "id"].tolist()
    current_depth = clf.get_depth()

    intent = "Range" if max_depth is None else "SimplifiedRange"

    rank = 1 / (pow(current_depth, 2) + 1)

    algorithm = "DecisionTree"

    pred = Prediction(
        rank=rank,
        intent=intent,
        memberIds=member_ids,
        dimensions=dimensions,
        info={
            "depth": clf.get_depth(),
            "rules": rules
        },
        algorithm=algorithm,
        membership=getStats(member_ids, selected_ids.tolist()),
        description=f"Range-{algorithm}",
    )

    if current_depth > 1 and max_depth is None:
        new_pred = range_intent(dataset, dimensions, selection,
                                current_depth - 1)
        pred_list = []
        pred_list.append(pred)
        pred_list.extend(new_pred)
        return pred_list

    return [pred]
Exemplo n.º 3
0
    def predict(self, selection: List[int], dataset):
        output = self.processOutput()
        sels = np.array(selection)
        ids = dataset["id"]

        preds: List[Prediction] = [
            Prediction(
                rank=rank_jaccard(vals.values, sels),
                intent=str(col),
                memberIds=self.getMemberIds(vals.values, ids),
                dimensions=self.getDimensionArr(),
                info=self.getInfo(),
                algorithm=self.algorithm,
                membership=getStats(
                    self.getMemberIds(vals.values, ids),
                    ids[sels.astype(bool)].tolist(),
                ),
                description=self.description,
            ) for col, vals in output.iteritems()
        ]

        return preds
Exemplo n.º 4
0
    def predict(self, selection: List[int], dataset):
        output = self.processOutput()
        sels = np.array(selection)
        ids = dataset["id"]

        preds: List[Prediction] = [
            Prediction(
                rank=rank_jaccard(output, sels),
                intent=self.intentType,
                memberIds=self.getMemberIds(output, ids),
                dimensions=self.getDimensionArr(),
                info=self.getSkylineInfo(dataset,
                                         self.getMemberIds(output, ids)),
                algorithm=self.algorithm,
                membership=getStats(
                    self.getMemberIds(output, ids),
                    ids[sels.astype(bool)].tolist(),
                ),
                description=self.description,
            )
        ]

        return preds
Exemplo n.º 5
0
 def __init__(self, type, prediction, **kwargs):
     super().__init__(type)
     self.prediction: Prediction = Prediction(**prediction)
Exemplo n.º 6
0
def applyPrediction(
    prediction: Prediction,
    selections: List[str],
    target: pd.DataFrame,
    target_id: str,
) -> Prediction:
    if prediction.original_id is not None and prediction.original_id == target_id:
        return prediction

    algorithm = Algorithms(prediction.algorithm)
    intent = Intents(prediction.intent)
    dimensions = prediction.dimensions
    info = prediction.info
    sels = target.id.isin(selections)

    ids = np.array([])
    new_info = deepcopy(info)

    if algorithm == Algorithms.KMEANS:
        ids, centers, hull, closest_center = applyKMeans(
            target,
            dimensions,
            info["params"]["n_clusters"],
            info["selected_center"],
            np.array(info["centers"]),
        )
        new_info["centers"] = centers.tolist()
        new_info["hull"] = hull
        new_info["selected_center"] = closest_center

    elif algorithm == Algorithms.DBSCAN:
        eps = info["params"]["eps"]
        min_samples = info["params"]["min_samples"]
        if intent == Intents.CLUSTER:
            ids, hull = applyDBScanCluster(target, prediction.dimensions, eps,
                                           min_samples, prediction.memberIds)
            new_info["hull"] = hull
        elif intent == Intents.OUTLIER or intent == Intents.NONOUTLIER:
            ids = applyDBScanOutlier(
                target,
                prediction.dimensions,
                eps,
                min_samples,
                intent != Intents.NONOUTLIER,
            )
    elif algorithm == Algorithms.DECISIONTREE:
        ids = apply_range(target, info["rules"])
    elif algorithm == Algorithms.BNL:
        ids, new_info = applySkyline(target, prediction.dimensions,
                                     info["sense"])
        ids = ids.astype(bool)
        new_info["frontier"] = target[ids][
            prediction.dimensions].values.tolist()
        ids = target[ids].id
    elif algorithm == Algorithms.LR:
        return apply_linear_regression(target, prediction, sels)

    intents = target.id.isin(ids)

    return Prediction(
        rank=rank_jaccard(intents, sels),
        intent=intent.value,
        memberIds=ids.tolist() if type(ids) is not list else ids,
        dimensions=prediction.dimensions,
        info=new_info,
        algorithm=algorithm.value,
        membership=getStats(ids, selections),
        description=prediction.description,
    )