Пример #1
0
    if not standalone:
        spark_context = SparkContext(appName="GMM-MLE-example")

    num_classes = 50
    #X_train,Y_train,X_test,Y_test = generate_datasets.generate_multivariate_normals( 5, 2, 150, 50, 5.0, 2.0 )
    X_train, Y_train, X_test, Y_test = machine_learning.generate_datasets.generate_multivariate_normals(
        num_classes, 7, 25000, 5000, 15.0, 12.0)

    #X_train,Y_train = load_samples( dataset_filename )

    #os.makedirs( base_dir+'/log',    exist_ok=True )
    #os.makedirs( base_dir+'/models', exist_ok=True )

    mle = machine_learning.MLE(covar_type=covar_type,
                               dim=X_train.shape[1],
                               log_dir=base_dir + '/log',
                               models_dir=base_dir + '/models',
                               batch_size=500)

    if spark_context is not None:
        samples = spark_context.parallelize(X_train, slices)
        samples.persist()
        mle.fit_with_spark(spark_context=spark_context,
                           samples=samples,
                           max_components=max_components)
        samples.unpersist()
        spark_context.stop()
    else:
        mle.fit_standalone(samples=X_train, max_components=max_components)
	print getCurrentDateTimeString() + " - we are working with " + str(samples.count()) + " blocks of approximately " + str(clust_batch_size) + " samples"
	# Shows an example of shape of the elements in the temporary RDD of blocks of samples
	print getCurrentDateTimeString() + " - " + str(samples.first().shape)

# Gets the dimensionality of samples in order to create the object of the class MLE.
dim_x = samples.first().shape[1]


# Models and Logs Directories Creation
createDirectoryIfNotExists(absoluteclusteringFullLogDir)
createDirectoryIfNotExists(absoluteClusteringFullModelsDirName)

# Delete previus executions data
deleteDirectoryData(absoluteclusteringFullLogDir)
deleteDirectoryData(absoluteClusteringFullModelsDirName)

# Create MLE class
mle = machine_learning.MLE(covar_type = 'full', dim = dim_x, log_dir = absoluteclusteringFullLogDir, models_dir = absoluteClusteringFullModelsDirName)

# Fit clusters
mle.fit_with_spark(spark_context = spark_context, samples = samples, max_components = clustFullMaxComponents )

samples.unpersist()
spark_context.stop()


# Get current time to monitorize execution time
executionEndTime = time.time()
if verbose:
		print getExecutionTimeMsg(executionStartTime, executionEndTime)
Пример #3
0
        # Shows an example of each element in the temporary RDD of blocks of samples
        print(samples.first())
        print(type(samples.first()))

        samples.persist()
        print("we are working with %d blocks of approximately %d samples " %
              (samples.count(), batch_size))

        # Shows an example of shape of the elements in the temporary RDD of blocks of samples
        print(samples.first().shape)
        # Gets the dimensionality of samples in order to create the object of the class MLE.
        dim_x = samples.first().shape[1]

        mle = machine_learning.MLE(covar_type=covar_type,
                                   dim=dim_x,
                                   log_dir=base_dir + '/log',
                                   models_dir=base_dir + '/models')

        mle.fit_with_spark(spark_context=spark_context,
                           samples=samples,
                           max_components=max_components)

        samples.unpersist()
        spark_context.stop()
    else:
        X_train, Y_train = load_samples(dataset_filename)
        dim_x = 0
        if type(X_train) == list:
            dim_x = X_train[0].shape[1]
        elif type(X_train) == numpy.ndarray:
            dim_x = X_train.shape[1]
Пример #4
0
        # Models and Logs Directories Creation
        createDirectoryIfNotExists(auxLogDirName)
        createDirectoryIfNotExists(auxModelsDirName)

        # Delete previus executions data
        deleteDirectoryData(auxLogDirName)
        deleteDirectoryData(auxModelsDirName)

        if verbose:
            print getCurrentDateTimeString(
            ) + " - Working with " + covarType + " matrix covariance type"

        # Create MLE class
        mle = machine_learning.MLE(covar_type=covarType,
                                   dim=dim_x,
                                   log_dir=auxLogDirName,
                                   models_dir=auxModelsDirName)

        try:
            # Fit clusters
            mle.fit_with_spark(spark_context=spark_context,
                               samples=samples,
                               max_components=reclust_max_components)

        except Exception:
            print getCurrentDateTimeString(
            ) + " - An exception has been thrown"

    samples.unpersist()
    spark_context.stop()
Пример #5
0
	print getCurrentDateTimeString() + " - we are working with " + str(samples.count()) + " blocks of approximately " + str(clust_batch_size) + " samples"
	# Shows an example of shape of the elements in the temporary RDD of blocks of samples
	print getCurrentDateTimeString() + " - " + str(samples.first().shape)

# Gets the dimensionality of samples in order to create the object of the class MLE.
dim_x = samples.first().shape[1]


# Models and Logs Directories Creation
createDirectoryIfNotExists(absoluteclusteringLogDir)
createDirectoryIfNotExists(absoluteClusteringModelsDirName)

# Delete previus executions data
deleteDirectoryData(absoluteclusteringLogDir)
deleteDirectoryData(absoluteClusteringModelsDirName)

# Create MLE class
mle = machine_learning.MLE(covar_type = clust_covar_type, dim = dim_x, log_dir = absoluteclusteringLogDir, models_dir = absoluteClusteringModelsDirName)

# Fit clusters
mle.fit_with_spark(spark_context = spark_context, samples = samples, max_components = clust_max_components )

samples.unpersist()
spark_context.stop()


# Get current time to monitorize execution time
executionEndTime = time.time()
if verbose:
		print getExecutionTimeMsg(executionStartTime, executionEndTime)
X = pf.transform(X)

N = int(0.8 * len(X))
X_train = X[:N]
Y_train = Y[:N]
X_test = X[N:]
Y_test = Y[N:]

use_gmm = True

if use_gmm:
    K = 74

    if K is None:
        mle = machine_learning.MLE(covar_type='diagonal',
                                   dim=X.shape[1],
                                   log_dir='meteo.1/log',
                                   models_dir='meteo.1/models')
        mle.fit_standalone(samples=X_train, max_components=250, batch_size=10)
    else:
        gmm = machine_learning.GMM()
        gmm.load_from_text(filename='meteo.1/models/gmm-%04d.txt' % K)

        mean_per_class = numpy.zeros([K, Y_train.shape[1]])
        denominator = numpy.zeros(K)
        for t in range(len(X_train)):
            posteriors, logL = gmm.posteriors(X_train[t])
            mean_per_class += numpy.outer(posteriors, Y_train[t])
            denominator += posteriors
        mean_per_class /= denominator.reshape(-1, 1)

        y_predict = numpy.zeros([len(Y_test), Y_test.shape[1]])