def run(factors, file, features, outpath): """ Fit a factor analysis to a data set """ from pybda.util.string import drop_suffix from pybda.logger import set_logger from pybda.spark_session import SparkSession from pybda.io.io import read_info, read_and_transmute from pybda.io.as_filename import as_logfile outpath = drop_suffix(outpath, "/") set_logger(as_logfile(outpath)) with SparkSession() as spark: try: features = read_info(features) data = read_and_transmute(spark, file, features, assemble_features=False) fl = FactorAnalysis(spark, factors, features) trans = fl.fit_transform(data) trans.write(outpath) except Exception as e: logger.error("Some error: {}".format(str(e)))
def run(clusters, file, features, outpath): """ Fit a kmeans-clustering to a data set. """ from pybda.io.as_filename import as_logfile from pybda.logger import set_logger from pybda.spark_session import SparkSession from pybda.util.string import drop_suffix from pybda.io.io import read_info, read_and_transmute outfolder = drop_suffix(outpath, "/") set_logger(as_logfile(outpath)) with SparkSession() as spark: try: features = read_info(features) data = read_and_transmute(spark, file, features) fit = KMeans(spark, clusters, features) fit = fit.fit(data, outfolder) fit.write(data, outfolder) except Exception as e: logger.error("Some error: {}".format(e))