Exemplo n.º 1
0
    def predict(self, point, last_price):
        """Point debe ser un Data Frame de Pandas con las información
		necesaria para realizar la predicción."""
        # 1. Standardize point with training mean and standard deviation.
        test_data = self.__standardize_features_for_test(
            point, self.columns_to_standardize, self.column_means,
            self.column_stds)
        # 2. Add it to the data.
        df = pd.concat([self.data, test_data])
        # 3. Windowize.
        fmt = DataFormatter()
        X, Y = fmt.windowize_series(df.as_matrix(),
                                    size=self.input_window_size,
                                    column_indexes=self.columns_to_windowize)
        # 4. Extract the last window.
        last_window = fmt.get_last_window(
            df.as_matrix(),
            size=self.input_window_size,
            column_indexes=self.columns_to_windowize)
        # 5. Compute the error.
        train_score = self.model.evaluate(X, Y, verbose=0)
        train_score = np.array([
            train_score[0],
            np.sqrt(train_score[0]), train_score[1], train_score[2] * 100
        ])
        # 6. Make the prediction.
        prediction = self.model.predict(last_window)
        # 7. Computing prediction intervals
        pred_upper = prediction + 1.96 * train_score[1]
        pred_lower = prediction - 1.96 * train_score[1]
        # 8. Transform back the prediction.
        prediction = last_price * np.exp(prediction)
        pred_upper = last_price * np.exp(pred_upper)
        pred_lower = last_price * np.exp(pred_lower)
        return prediction, [pred_lower, pred_upper]
Exemplo n.º 2
0
def stormDump(dmonEndpoint, qgte, qlte, qsize, qinterval):
    '''
    :param dmonEndpoint: -> DMON endpoint
    :param qgte: -> greater than timestamp
    :param qlte: -> less than timestamp
    :param qsize: -> query size
    :param qinterval: -> query interval
    :return:
    '''
    dmonConnector = Connector(dmonEndpoint)
    stormTopology = dmonConnector.getStormTopology()

    bolts = stormTopology['bolts']
    spouts = stormTopology['spouts']
    print "Detected %s bolts" % str(bolts)
    print "Detected %s spouts" % str(spouts)

    qConstructor = QueryConstructor()
    dformat = DataFormatter(dataDir)

    storm, storm_file = qConstructor.stormString()
    print "Query string -> %s" % storm
    qstorm = qConstructor.stormQuery(storm, qgte, qlte, qsize, qinterval, bolts=bolts, spouts=spouts)
    print "Query -> %s" % qstorm
    gstorm = dmonConnector.aggQuery(qstorm)

    print "Response:"
    print gstorm
    dformat.dict2csv(gstorm, qstorm, storm_file)
Exemplo n.º 3
0
 def process_csv(self, csv_line, updown):
     data = csv_line.split(',')
     if self.cli:
         print ' %s sec \t%s\t%s/sec' % \
               (data[self.TIME_RANGE],
                DataFormatter.format_bytes(data[IPerfTest.TRANSFERED]),
                DataFormatter.format_bits(data[IPerfTest.SPEED])
               )
     if updown == 'up':
         self.process_up_csv(data)
     else:
         self.process_down_csv(data)
Exemplo n.º 4
0
 def fit_model(self, epochs=200, verbose=0):
     """Entrenar el modelo para producción."""
     # Patching
     self.column_means = {}
     self.column_stds = {}
     # Windowize dataset
     fmt = DataFormatter()
     self.X, self.Y = fmt.windowize_series(
         self.data.as_matrix(),
         size=self.input_window_size,
         column_indexes=self.columns_to_windowize)
     self.model.fit(self.X,
                    self.Y,
                    epochs=epochs,
                    batch_size=32,
                    verbose=verbose)
Exemplo n.º 5
0
def cassandraDump(dmonEndpoint, qgte, qlte, qsize, qinterval):
    '''
    :param dmonEndpoint: -> DMON endpoint
    :param qgte: -> greater than timestamp
    :param qlte: -> less than timestamp
    :param qsize: -> query size
    :param qinterval: -> query interval
    :return:
    '''
    dmonConnector = Connector(dmonEndpoint)
    qConstructor = QueryConstructor()
    dformat = DataFormatter(dataDir)

    roles = dmonConnector.roles()
    cassandra_hosts = []
    for el in roles['Nodes']:
        for k, v in el.iteritems():
            if 'cassandra' in v:
                print 'Found cassandra role at %s' % k
                cassandra_hosts.append(k)

    for host in cassandra_hosts:
        print "-" * 50
        cassandra, cassandra_file = qConstructor.cassandraCounterString(host=host)
        print "Query string -> %s" % cassandra
        qcassandra = qConstructor.cassandraQuery(cassandra, qgte, qlte, qsize, qinterval)
        print "Query -> %s" % qcassandra
        gcassandra = dmonConnector.aggQuery(qcassandra)

        print "Response:"
        print gcassandra
        dformat.dict2csv(gcassandra, qcassandra, cassandra_file)

        cassandragauge, cassandragauge_file = qConstructor.cassandraGaugeString(host=host)

        qcassandragauge = qConstructor.cassandraQuery(cassandragauge, qgte, qlte, qsize, qinterval)
        print "Query -> %s" % qcassandragauge
        gcassandragauge = dmonConnector.aggQuery(qcassandragauge)

        print "Response:"
        print gcassandragauge
        dformat.dict2csv(gcassandragauge, qcassandragauge, cassandragauge_file)
        print "-" * 50
Exemplo n.º 6
0
    def predict(self, point=None):
        """Point debe ser un Data Frame de Pandas con las información
		necesaria para realizar la predicción."""
        # 1. Standardize point with training mean and standard deviation.
        # 2. Add it to the data.
        if point is None:
            df = self.data
        else:
            test_data = self.__standardize_features_for_test(
                point, self.columns_to_standardize, self.column_means,
                self.column_stds)
            df = pd.concat([self.data, test_data])
        # 3. Windowize.
        fmt = DataFormatter()
        X, Y = fmt.windowize_series(df.as_matrix(),
                                    size=self.input_window_size,
                                    column_indexes=self.columns_to_windowize)
        # 4. Extract the last window.
        last_window = fmt.get_last_window(
            df.as_matrix(),
            size=self.input_window_size,
            column_indexes=self.columns_to_windowize)
        last_window = last_window[None, :]
        # 5. Compute the error.
        train_score = self.model.evaluate(X, Y, verbose=0)
        train_score = np.array([
            train_score[0],
            np.sqrt(train_score[0]), train_score[1], train_score[2] * 100
        ])
        # 6. Make the prediction.
        prediction = np.squeeze(self.model.predict(last_window))
        # 7. Computing prediction intervals
        pred_upper = prediction + 1.96 * train_score[1]
        pred_lower = prediction - 1.96 * train_score[1]
        # Revert standardization
        prediction = prediction * self.column_stds[
            u'Close'] + self.column_means[u'Close']
        pred_upper = pred_upper * self.column_stds[
            u'Close'] + self.column_means[u'Close']
        pred_lower = pred_lower * self.column_stds[
            u'Close'] + self.column_means[u'Close']
        return prediction, pred_lower, pred_upper
Exemplo n.º 7
0
 def get_summary(self):
     try:
         summary = 'Upload:\n'
         summary += '  %s: %s\t%s: %s\n' % \
               ('Transferred'.ljust(11), DataFormatter.format_bytes(self.get_up_transferred()),
                'Speed'.ljust(11), DataFormatter.format_bits(self.get_up_speed()))
         summary += '  %s: %s\t%s: %s\n' % \
               ('Min'.ljust(11), DataFormatter.format_bits(self.get_up_speed_min()),
                'Max'.ljust(11), DataFormatter.format_bits(self.get_up_speed_max()))
         summary += '  %s: %2.2f%%\n' % ('Jitter'.ljust(11), self.get_up_jitter())
         summary += 'Download:\n'
         summary += '  %s: %s\t%s: %s\n' % \
               ('Transferred'.ljust(11), DataFormatter.format_bytes(self.get_down_transferred()),
                'Speed'.ljust(11), DataFormatter.format_bits(self.get_down_speed()))
         summary += '  %s: %s\t%s: %s\n' % \
               ('Min'.ljust(11), DataFormatter.format_bits(self.get_down_speed_min()),
                'Max'.ljust(11), DataFormatter.format_bits(self.get_down_speed_max()))
         summary += '  %s: %2.2f%%\n' % ('Jitter'.ljust(11), self.get_down_jitter())
         return summary
     except:
         pass
Exemplo n.º 8
0
 def print_summary(self):
     print '-' * IPerfTest.PRINT_WIDTH
     print '|%s|' % self.description.center(IPerfTest.PRINT_WIDTH - 2)
     print '|%s|' % 'Summary'.center(IPerfTest.PRINT_WIDTH - 2)
     print '-' * IPerfTest.PRINT_WIDTH
     try:
         print 'Upload:'
         print '  %s: %s\t%s: %s' % \
               ('Transferred'.ljust(11), DataFormatter.format_bytes(self.get_up_transferred()),
                'Speed'.ljust(11), DataFormatter.format_bits(self.get_avg_up_speed()))
         print '  %s: %s\t%s: %s' % \
               ('Min'.ljust(11), DataFormatter.format_bits(self.total_up_min),
                'Max'.ljust(11), DataFormatter.format_bits(self.total_up_max))
         print '  %s: %2.2f%%' % ('Jitter'.ljust(11), self.get_up_jitter())
         print 'Download:'
         print '  %s: %s\t%s: %s' % \
               ('Transferred'.ljust(11), DataFormatter.format_bytes(self.get_down_transferred()),
                'Speed'.ljust(11), DataFormatter.format_bits(self.get_avg_down_speed()))
         print '  %s: %s\t%s: %s' % \
               ('Min'.ljust(11), DataFormatter.format_bits(self.total_down_min),
                'Max'.ljust(11), DataFormatter.format_bits(self.total_down_max))
         print '  %s: %2.2f%%' % ('Jitter'.ljust(11), self.get_down_jitter())
     except:
         pass
Exemplo n.º 9
0
import os
import tempfile
import traceback
import weka.core.jvm as jvm
import edeweka.helper as helper
from weka.clusterers import Clusterer
import weka.core.converters as converters
import weka.core.serialization as serialization
from dataformatter import DataFormatter
import weka.core.packages as packages

dataDir = os.path.join(os.path.dirname(os.path.abspath('')), 'data')
modelDir = os.path.join(os.path.dirname(os.path.abspath('')), 'models')

dformat = DataFormatter(dataDir)

dformat.dict2arff(os.path.join(dataDir, 'System.csv'),
                  os.path.join(dataDir, 'System.arff'))

#Arff_file = os.path.join(dataDir, 'System.arff')

jvm.start(packages=True)

data = converters.load_any_file(os.path.join(dataDir, 'System.arff'))
clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans",
                      options=["-N", "10", "-S", "10"])
clusterer.build_clusterer(data)

# print clusterer
# cluster the data
# for inst in data:
Exemplo n.º 10
0
    def test_model(self, n_splits=9, cv_runs=10, epochs=100, verbose=2):
        """Evaluación del modelo usando validación cruzada
		hacia adelante."""
        from sklearn.model_selection import TimeSeriesSplit

        self.metrics = ['MSE', 'RMSE', 'MAE', 'MAPE']
        train_scores = np.zeros((cv_runs, n_splits, len(self.metrics)))
        test_scores = np.zeros((cv_runs, n_splits, len(self.metrics)))
        fmt = DataFormatter()
        tscv = TimeSeriesSplit(n_splits=n_splits)
        for j in xrange(cv_runs):
            # print('\nCross-validation run %i' % (j+1))
            i = 1
            for train_index, test_index in tscv.split(
                    self.data['LogReturn'].values):
                # División del conjunto de datos en entrenamiento y prueba
                train_df = self.data.loc[train_index]
                test_df = self.data.loc[test_index]
                # Estandarización del conjunto de datos
                if len(self.columns_to_standardize) != 0:
                    train_data, training_means, training_stds = self.__standardize_features(
                        train_df, self.columns_to_standardize)
                    test_data = self.__standardize_features_for_test(
                        test_df, self.columns_to_standardize, training_means,
                        training_stds)
                else:
                    train_data = train_df
                    test_data = test_df
                # Extracción de ventanas de datos
                trainX, trainY = fmt.windowize_series(
                    train_data.as_matrix(),
                    size=self.input_window_size,
                    column_indexes=self.columns_to_windowize)
                testX, testY = fmt.windowize_series(
                    test_data.as_matrix(),
                    size=self.input_window_size,
                    column_indexes=self.columns_to_windowize)
                # Ajustando el modelo
                # print('Fold %i' % (i))
                self.model.fit(trainX,
                               trainY,
                               epochs=epochs,
                               batch_size=32,
                               validation_data=(testX, testY),
                               verbose=verbose)
                # Evaluando cada partición de la validación cruzada hacia adelante
                train_score = self.model.evaluate(trainX,
                                                  trainY,
                                                  verbose=verbose)
                train_score = np.array([
                    train_score[0],
                    np.sqrt(train_score[0]), train_score[1],
                    train_score[2] * 100
                ])
                test_score = self.model.evaluate(testX, testY, verbose=verbose)
                test_score = np.array([
                    test_score[0],
                    np.sqrt(test_score[0]), test_score[1], test_score[2] * 100
                ])
                # print('Train Score: %.5f MSE, %.5f RMSE, %.5f MAE, %.5f%% MAPE' % (train_score[0], train_score[1], train_score[2], train_score[3]))
                # print('Test Score: %.5f MSE, %.5f RMSE, %.5f MAE, %.5f%% MAPE\n' % (test_score[0], test_score[1], test_score[2], test_score[3]))
                # [0: MSE, 1: RMSE, 2: MAE, 3: MAPE]
                train_scores[j, i - 1, :] = train_score
                test_scores[j, i - 1, :] = test_score
                i += 1
        self.train_results = train_scores.mean(axis=0).mean(axis=0)
        self.test_results = test_scores.mean(axis=0).mean(axis=0)
Exemplo n.º 11
0
import os, sys
from dataformatter import DataFormatter
from pyQueryConstructor import QueryConstructor
from edeconnector import Connector

if __name__ == '__main__':
    dataDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
    prometheus_endpoint = '194.102.62.155'
    prometheus_port = '9090'
    print("Collecting data from Monitoring at: {}".format(prometheus_endpoint))
    prometheus_query = {"query": '''{__name__=~"node.+"}[80m]'''}
    # prometheus_query = qContructor.pr_query_node(time="1h")
    edeConnector = Connector(prEndpoint=prometheus_endpoint,
                             MInstancePort=prometheus_port)

    test = edeConnector.pr_targets()
    print("Current target information:")
    print(test)
    test1 = edeConnector.pr_labels('cpu')
    print(test1)
    test2 = edeConnector.pr_status()
    print("Status information")
    print(test2)
    print("Executing query ....")
    test3 = edeConnector.pr_query(query=prometheus_query)
    dformat = DataFormatter(dataDir)
    print("Query completed ....")
    print("Saving ...")
    test_format = dformat.prtoDF(test3, checkpoint=True, verbose=True)
    print("Saved")
Exemplo n.º 12
0
    dataDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
    modelDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models')

    #Standard query values
    # qte = 1475842980000
    # qlte = 1475845200000
    qgte = 1477911300000
    qlte = 1477914720000

    qsize = 0
    qinterval = "10s"
    dmonEndpoint = '85.120.206.27'

    dmonConnector = Connector(dmonEndpoint)
    qConstructor = QueryConstructor()
    dformat = DataFormatter(dataDir)

    nodeList = dmonConnector.getNodeList()
    interval = dmonConnector.getInterval()

    if int(qinterval[:-1]) < interval['System']:
        logger.warning('[%s] : [WARN] System Interval smaller than set interval!',
                    datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))


    # per slave unique process name list
    nodeProcessReduce = {}
    nodeProcessMap = {}

    # Get host based metrics
    for node in nodeList:
Exemplo n.º 13
0
    modelDir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                            'models')

    #Standard query values
    # qte = 1475842980000
    # qlte = 1475845200000
    qgte = 1477911300000
    qlte = 1477914720000

    qsize = 0
    qinterval = "10s"
    dmonEndpoint = '85.120.206.27'

    dmonConnector = Connector(dmonEndpoint)
    qConstructor = QueryConstructor()
    dformat = DataFormatter(dataDir)

    nodeList = dmonConnector.getNodeList()
    interval = dmonConnector.getInterval()

    if int(qinterval[:-1]) < interval['System']:
        logger.warning(
            '[%s] : [WARN] System Interval smaller than set interval!',
            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))

    # per slave unique process name list
    nodeProcessReduce = {}
    nodeProcessMap = {}

    # Get host based metrics
    for node in nodeList: