Exemplo n.º 1
0
def main(args=None):
    args = parse_args(args)
    steps = range(args.start, args.stop, args.step)
    if args.scheduler_address:
        client = Client(args.scheduler_address)
        info = client.scheduler_info()
        logger.info("Distributed mode: %s", client.scheduler)
        logger.info("Dashboard: %s:%s", info["address"],
                    info["services"]["bokeh"])
    else:
        logger.warning("Local mode")

    logger.info("Fitting for %s", list(steps))

    logger.info("Reading data")
    X = read().pipe(transform).pipe(as_array)
    X, = persist(X)

    timings = []

    for n_clusters in range(args.start, args.stop, args.step):
        logger.info("Starting %02d", n_clusters)
        t0 = tic()
        with _timer(n_clusters, _logger=logger):
            km = do(X, n_clusters, factor=args.factor)
        t1 = tic()
        logger.info("Cluster Centers [%s]:\n%s", n_clusters,
                    km.cluster_centers_)
        inertia = km.inertia_.compute()
        logger.info("Inertia [%s]: %s", km.cluster_centers_, inertia)
        timings.append((n_clusters, args.factor, t1 - t0, inertia))

    pd.DataFrame(timings, columns=["n_clusters", "factor", "time",
                                   "inertia"]).to_csv("timings.csv")
Exemplo n.º 2
0
    def partial_fit(self, X, y=None, **kwargs):
        logger.info("Starting partial_fit")
        with _timer("fit", _logger=logger):
            result = self.estimator.partial_fit(X, y, **kwargs)

        # Copy over learned attributes
        copy_learned_attributes(result, self)
        copy_learned_attributes(result, self.estimator)
        return self
Exemplo n.º 3
0
def fit(data, use_scikit_learn=False):
    logger.info("Starting to cluster")
    # Cluster
    n_clusters = 8
    oversampling_factor = 2
    if use_scikit_learn:
        km = sk.KMeans(n_clusters=n_clusters, random_state=0)
    else:
        km = KMeans(
            n_clusters=n_clusters,
            oversampling_factor=oversampling_factor,
            random_state=0,
        )
    logger.info(
        "Starting n_clusters=%2d, oversampling_factor=%2d",
        n_clusters,
        oversampling_factor,
    )
    with _timer("km.fit", _logger=logger):
        km.fit(data)
Exemplo n.º 4
0
    def fit(self, X, y=None, **kwargs):
        """Fit the underlying estimator.

        Parameters
        ----------
        X, y : array-like
        **kwargs
            Additional fit-kwargs for the underlying estimator.

        Returns
        -------
        self : object
        """
        logger.info("Starting fit")
        with _timer("fit", _logger=logger):
            result = self.estimator.fit(X, y, **kwargs)

        # Copy over learned attributes
        copy_learned_attributes(result, self)
        copy_learned_attributes(result, self.estimator)
        return self