Exemplo n.º 1
0
def run(file, meta, features, response, family, outpath, predict):
    """
    Fit a generalized linear regression model.
    """

    import pathlib
    from pybda.util.string import drop_suffix
    from pybda.logger import set_logger
    from pybda.spark_session import SparkSession
    from pybda.io.as_filename import as_logfile
    from pybda.io.io import read_and_transmute, read_column_info

    outpath = drop_suffix(outpath, "/")
    set_logger(as_logfile(outpath))

    with SparkSession() as spark:
        try:
            meta, features = read_column_info(meta, features)
            data = read_and_transmute(spark, file, features, response)
            fl = GBM(spark, response, features, family)
            fl = fl.fit(data)
            fl.write(outpath)
            if pathlib.Path(predict).exists():
                pre_data = read_and_transmute(spark,
                                              predict,
                                              features,
                                              drop=False)
                pre_data = fl.predict(pre_data)
                pre_data.write(outpath)

        except Exception as e:
            logger.error("Some error: %s", str(e))
Exemplo n.º 2
0
def run(factors, file, features, outpath):
    """
    Fit a factor analysis to a data set
    """

    from pybda.util.string import drop_suffix
    from pybda.logger import set_logger
    from pybda.spark_session import SparkSession
    from pybda.io.io import read_info, read_and_transmute
    from pybda.io.as_filename import as_logfile

    outpath = drop_suffix(outpath, "/")
    set_logger(as_logfile(outpath))

    with SparkSession() as spark:
        try:
            features = read_info(features)
            data = read_and_transmute(spark,
                                      file,
                                      features,
                                      assemble_features=False)
            fl = FactorAnalysis(spark, factors, features)
            trans = fl.fit_transform(data)
            trans.write(outpath)
        except Exception as e:
            logger.error("Some error: {}".format(str(e)))
Exemplo n.º 3
0
def run(clusters, file, features, outpath):
    """
    Fit a kmeans-clustering to a data set.
    """

    from pybda.io.as_filename import as_logfile
    from pybda.logger import set_logger
    from pybda.spark_session import SparkSession
    from pybda.util.string import drop_suffix
    from pybda.io.io import read_info, read_and_transmute

    outfolder = drop_suffix(outpath, "/")
    set_logger(as_logfile(outpath))

    with SparkSession() as spark:
        try:
            features = read_info(features)
            data = read_and_transmute(spark, file, features)
            fit = KMeans(spark, clusters, features)
            fit = fit.fit(data, outfolder)
            fit.write(data, outfolder)
        except Exception as e:
            logger.error("Some error: {}".format(e))