Пример #1
0
def main(_):
    config = cfg.Config()
    data_builder = DataBuilder(config)
    train_set, evaluate_set = data_builder.build_data(config.path_data)

    model = Model(config)
    model.build()
    model.train(train_set, evaluate_set)
Пример #2
0
    def __init__(self, trading_data):
        self.config = trading_data.config

        self.company = trading_data.company

        data_set = pd.read_csv(trading_data.csv_dataset)

        self.data_builder = DataBuilder(data_set, trading_data.entry_columns, 
                                        trading_data.prediction_column)
        self.data_builder.build_data()
Пример #3
0
def initialize(online, version, use_cherrypy):
    # configure the logger
    global logger
    if use_cherrypy:
        import cherrypy
        logger = cherrypy.log
    else:
        import multiprocessing
        logger = multiprocessing.get_logger()

    # create a database session
    connectionString = ConnectionString()
    engine = None

    if online == 'True':
        engine = create_engine(connectionString.connectUrlonline)
    else:
        engine = create_engine(connectionString.connectUrloffline)

    Base.prepare(engine, reflect=True)
    session = scoped_session(sessionmaker(engine))

    # store the version id
    global databuilder
    databuilder = DataBuilder(session, version, logger)
Пример #4
0
    def fetch(self, start=None, stop=None):
        """Fetch data from the websites in the interval [start, stop]."""

        start = start if start is not None else 0
        stop = min(stop, len(self.sites)) if stop is not None else len(
            self.sites)

        for i in range(start, stop):
            url = self.sites[i]
            name = self.get_filename(url)

            # Build data output
            builder = DataBuilder()
            builder.append('URL', url)
            self.fetch_site(builder, url)
            self.fetch_alexa(builder, url)
            self.fetch_robots(builder, url)
            self.fetch_pagerank(builder, url)

            self.write_output_file(name, builder.generate())
            self.log_output('Fetched (%s in range %i to %i): %s' %
                            (str(i + 1), start, stop, url))
Пример #5
0
    def fetch(self, start=None, stop=None):
        """Fetch data from the websites in the interval [start, stop]."""

        start = start if start is not None else 0
        stop = min(stop, len(
            self.sites)) if stop is not None else len(self.sites)

        for i in range(start, stop):
            url = self.sites[i]
            name = self.get_filename(url)

            # Build data output
            builder = DataBuilder()
            builder.append('URL', url)
            self.fetch_site(builder, url)
            self.fetch_alexa(builder, url)
            self.fetch_robots(builder, url)
            self.fetch_pagerank(builder, url)

            self.write_output_file(name, builder.generate())
            self.log_output('Fetched (%s in range %i to %i): %s' % (
                str(i + 1), start, stop, url))
Пример #6
0
    def createConfig(self, ver, cnf, db, online, folder, use_cherrypy = False):

        if use_cherrypy:
            import cherrypy
            self.logger = cherrypy.log

            # implement part of the logging interface in cherrypy logger
            def log(self, lvl, msg, *args, **kwargs):
                if args:
                    msg = msg % args
                if kwargs and 'exc_info' in kwargs and kwargs['exc_info']:
                    traceback = True
                else:
                    traceback = False
                self.error(msg, '', lvl, traceback)

            def debug(self, msg, *args, **kwargs):
                self.log(logging.DEBUG, msg, *args, **kwargs)

            def info(self, msg, *args, **kwargs):
                self.log(logging.INFO, msg, *args, **kwargs)

            def warning(self, msg, *args, **kwargs):
                self.log(logging.WARNING, msg, *args, **kwargs)

            def critical(self, msg, *args, **kwargs):
                self.log(logging.CRITICAL, msg, *args, **kwargs)

            import types
            self.logger.log      = types.MethodType(log,      self.logger)
            self.logger.debug    = types.MethodType(debug,    self.logger)
            self.logger.info     = types.MethodType(info,     self.logger)
            self.logger.warning  = types.MethodType(warning,  self.logger)
            self.logger.critical = types.MethodType(critical, self.logger)

        else:
            formatter = logging.Formatter("[%(asctime)s - %(levelname)s/%(processName)s] %(name)s: %(message)s")
            handler   = logging.StreamHandler()
            handler.setFormatter(formatter)
            self.logger = multiprocessing.get_logger()
            self.logger.setLevel(logging.INFO)
            self.logger.addHandler(handler)

        version = None
        release = None
        try:
            version = DataBuilder.getRequestedVersion(ver, cnf, db, self.logger)
        except Exception as e:
            msg = 'ERROR: Query getRequestedVersion Error: ' + e.args[0]
            self.logger.error(msg)

        if version == None:
            print "\nCould Not Find The Requested Version.\n"
            return

        self.data_builder = DataBuilder(db, version, self.logger)

        try:
            release = self.queries.getRelease(version.id_release, db, self.logger)
        except Exception as e:
            msg = 'ERROR: Query getRelease Error: ' + e.args[0]
            self.logger.error(msg)

        # Adding File's Headers
        self.logger.info('retrieving header...')
        header = self.writeHeader(version.name, release.releasetag, version.processname)
        self.logger.info('done')

        tasks = [
            ('modules',        'getModules'),
            ('paths',          'getPaths'),
            ('sequences',      'getSequences'),
            ('datasets',       'getDatasetsPaths'),
            ('psets',          'getGlobalPsets'),
            ('streams',        'getStreams'),
            ('source',         'getSource'),
            ('esproducers',    'getESModules'),
            ('essources',      'getESSources'),
            ('services',       'getServices'),
            ('outputmodules',  'getOutputModules'),
            ('endpaths',       'getEndPaths'),
            ('schedule',       'getSchedule'),
        ]

        # build each part of the configuration
        parts = {}
        import task

        if self.threads == 1:
            # single threaded version
            import itertools
            task.initialize(online, version, use_cherrypy)
            task_iterator = itertools.imap(task.worker, tasks)
        else:
            # multiprocess version
            pool = multiprocessing.Pool(processes = self.threads, initializer = task.initialize, initargs = (online, version, use_cherrypy))
            task_iterator = pool.imap_unordered(task.worker, tasks)

        for (key, value, error) in task_iterator:
            parts[key] = value
            if error:
                self.logger.critical(error)
                return None

        # cleanup the task pool
        if self.threads != 1:
            pool.close()
            pool.join()

        # combinine all parts
        data = \
            header + \
            parts['psets'] + \
            parts['streams'] + \
            parts['datasets'] + \
            parts['source'] + \
            parts['essources'] + \
            parts['esproducers'] + \
            parts['services'] + \
            parts['modules'] + \
            parts['outputmodules'] + \
            parts['sequences'] + \
            parts['paths'] + \
            parts['endpaths'] + \
            parts['schedule']

        filename = folder + '/HLT.py'
        try:
            file = open(filename, 'w')
            file.write(data)
            file.close()
            return filename
        except Exception as e:
            msg = 'ERROR: Writting to file %s: %s' % (filename, e.args[0])
            self.logger.error(msg)
            return None
Пример #7
0
from data_builder import DataBuilder
import pandas as pd

if __name__ == '__main__':

    dataBuilder = DataBuilder()
    lastUpdated = dataBuilder.getLastUpdated()
    print("Database was last updated " + str(lastUpdated[0]) + "." +
          str(lastUpdated[1]) + "." + str(lastUpdated[2]))
    answer = input("Type 'Y' to update database: ")
    if (answer == "Y"):
        dataBuilder.buildDatabaseFromDate(lastUpdated[0], lastUpdated[1],
                                          lastUpdated[2])
        dataBuilder.updateLastUpdated()
        print("Database updated!")
Пример #8
0
from flask import Flask, request
from data_builder import DataBuilder

app = Flask(__name__)

# ROUTES FOR DATA
@app.route('/trackerdata', methods=['POST'])
def tracker_data():
    return d.tracker_data()

@app.route('/regiondata', methods=['POST'])
def region_data():
    region = request.args.get('region')
    return d.region_data(region)

@app.route('/totaldata', methods=['POST'])
def total_data():
    return d.total_data()

# ROUTES FOR NEWS
@app.route('/news', methods=['POST'])
def method_name():
    pass

if __name__ == '__main__':
    d = DataBuilder()
    app.run()
Пример #9
0
class RNNWrapper:
    def __init__(self, trading_data):
        self.config = trading_data.config

        self.company = trading_data.company

        data_set = pd.read_csv(trading_data.csv_dataset)

        self.data_builder = DataBuilder(data_set, trading_data.entry_columns, 
                                        trading_data.prediction_column)
        self.data_builder.build_data()

    def build(self):
       training_entry_set = self.data_builder.training_entry_set
       input_shape = (training_entry_set.shape[1], training_entry_set.shape[2])

       self.sequential = Sequential()
       for i in range(input_shape[1]):
           last_iteration = i == input_shape[1]-1

       self.sequential.add(LSTM(units = self.config.layer_units,
                            recurrent_activation = self.config.activation_function,
                            return_sequences = not last_iteration,
                            input_shape = input_shape))
       self.sequential.add(Dense(units = 1))

       optimizer = Adam(learning_rate=self.config.learning_rate,
                        beta_1=self.config.beta_1,
                        beta_2=self.config.beta_2)
       self.sequential.compile(optimizer = optimizer, loss = self.config.loss)

    def fit(self):
        entry_set = self.data_builder.training_entry_set
        result_set = self.data_builder.training_result_set

        rlrop = ReduceLROnPlateau(monitor=self.config.monitor, 
                                  factor=self.config.factor, 
                                  patience=self.config.patiente)

        with tf.device(self.config.device):
            self.sequential.fit(entry_set,
                                result_set,
                                epochs = self.config.epochs,
                                batch_size = self.config.batch_size,
                                callbacks=[rlrop])
    def predict(self):
        entry_set = self.data_builder.test_entry_set
        scaler_prediction = self.sequential.predict(entry_set)
        self.predicted_results = self.data_builder.inverse_transform(scaler_prediction)
    
    def load_real_results(self): 
        self.real_results = self.data_builder.real_test_results

    def to_image(self):
        plt.clf()
        plt.plot(self.real_results, color = 'red', label = 'Real Results')
        plt.plot(self.predicted_results, color = 'blue', label = 'Predicted Results')
        plt.title(self.company + ' Prediction')
        plt.xlabel('Time')
        plt.ylabel(self.company + ' Price')
        plt.legend()

        return plt

    def save(self, path):
        self.sequential.save(path)
Пример #10
0
    def createConfig(self,
                     ver,
                     cnf,
                     db,
                     online,
                     folder,
                     request=None,
                     use_cherrypy=False):

        ver = ver
        cnf = cnf

        if use_cherrypy:
            import cherrypy
            self.logger = cherrypy.log

            cache_session = request.db_cache

            if online == 'True' or online == 'true':
                src = 1
            else:
                src = 0

            if (cnf != -2 and cnf != -1):
                cnf = cache.folMappingDictGetExternal(cnf, src, "cnf",
                                                      cache_session, log)

            # implement part of the logging interface in cherrypy logger
            def log(self, lvl, msg, *args, **kwargs):
                if args:
                    msg = msg % args
                if kwargs and 'exc_info' in kwargs and kwargs['exc_info']:
                    traceback = True
                else:
                    traceback = False
                self.error(msg, '', lvl, traceback)

            def debug(self, msg, *args, **kwargs):
                self.log(logging.DEBUG, msg, *args, **kwargs)

            def info(self, msg, *args, **kwargs):
                self.log(logging.INFO, msg, *args, **kwargs)

            def warning(self, msg, *args, **kwargs):
                self.log(logging.WARNING, msg, *args, **kwargs)

            def critical(self, msg, *args, **kwargs):
                self.log(logging.CRITICAL, msg, *args, **kwargs)

            import types
            self.logger.log = types.MethodType(log, self.logger)
            self.logger.debug = types.MethodType(debug, self.logger)
            self.logger.info = types.MethodType(info, self.logger)
            self.logger.warning = types.MethodType(warning, self.logger)
            self.logger.critical = types.MethodType(critical, self.logger)

        else:
            formatter = logging.Formatter(
                "[%(asctime)s - %(levelname)s/%(processName)s] %(name)s: %(message)s"
            )
            handler = logging.StreamHandler()
            handler.setFormatter(formatter)
            self.logger = multiprocessing.get_logger()
            self.logger.setLevel(logging.INFO)
            self.logger.addHandler(handler)

        version = None
        release = None
        try:
            version = DataBuilder.getRequestedVersion(ver, cnf, db,
                                                      self.logger)
        except Exception as e:
            msg = 'ERROR: Query getRequestedVersion Error: ' + e.args[0]
            self.logger.error(msg)

        if version == None:
            print "\nCould Not Find The Requested Version.\n"
            return

        self.data_builder = DataBuilder(db, version, self.logger)

        try:
            release = self.queries.getRelease(version.id_release, db,
                                              self.logger)
        except Exception as e:
            msg = 'ERROR: Query getRelease Error: ' + e.args[0]
            self.logger.error(msg)

        # Adding File's Headers
        self.logger.info('retrieving header...')
        header = self.writeHeader(version.name, release.releasetag,
                                  version.processname)
        self.logger.info('done')

        tasks = [
            ('modules', 'getModules'),
            ('paths', 'getPaths'),
            ('sequences', 'getSequences'),
            ('datasets', 'getDatasetsPaths'),
            ('psets', 'getGlobalPsets'),
            ('streams', 'getStreams'),
            ('source', 'getSource'),
            ('esproducers', 'getESModules'),
            ('essources', 'getESSources'),
            ('services', 'getServices'),
            ('outputmodules', 'getOutputModules'),
            ('endpaths', 'getEndPaths'),
            ('schedule', 'getSchedule'),
        ]

        # build each part of the configuration
        parts = {}
        import task

        if self.threads == 1:
            # single threaded version
            import itertools
            task.initialize(online, version, use_cherrypy)
            task_iterator = itertools.imap(task.worker, tasks)
        else:
            # multiprocess version
            pool = multiprocessing.Pool(processes=self.threads,
                                        initializer=task.initialize,
                                        initargs=(online, version,
                                                  use_cherrypy))
            task_iterator = pool.imap_unordered(task.worker, tasks)

        for (key, value, error) in task_iterator:
            parts[key] = value
            if error:
                self.logger.critical(error)
                return None

        # cleanup the task pool
        if self.threads != 1:
            pool.close()
            pool.join()

        # combinine all parts
        data = \
            header + \
            parts['psets'] + \
            parts['streams'] + \
            parts['datasets'] + \
            parts['source'] + \
            parts['essources'] + \
            parts['esproducers'] + \
            parts['services'] + \
            parts['modules'] + \
            parts['outputmodules'] + \
            parts['sequences'] + \
            parts['paths'] + \
            parts['endpaths'] + \
            parts['schedule']

        filename = folder + '/HLT.py'
        try:
            file = open(filename, 'w')
            file.write(data)
            file.close()
            return filename
        except Exception as e:
            msg = 'ERROR: Writting to file %s: %s' % (filename, e.args[0])
            self.logger.error(msg)
            return None
Пример #11
0
import os
import sys
sys.path.append("../lib")
import utils

from config import Config
from downloader import Downloader
from data_builder import DataBuilder

config = Config("/usr/dev/speech-separation/config.yaml")
#downloader = Downloader(config)
data_builder = DataBuilder(config, config.data.num_workers * 2)

#utils.make_dirs("../data")
#utils.make_dirs("../data/audio")
#utils.make_dirs("../data/frames")

# Download data
#downloader.download_data("../data/csv/avspeech_train.csv", 0, 55000, wait_tasks=False)
#downloader.download_data("../data/csv/avspeech_test.csv", 0, 1000)
#downloader.download_noise_data("../data/csv/noise.csv", 0, 1000)

# Build data
#data_builder.build_embs(wait_tasks=False)
#data_builder.build_embs(is_train=False)

data_builder.build_audio(38000, wait_tasks=False)
data_builder.build_audio(1000, is_train=False)
Пример #12
0
ENTRY_COLUMNS = ['open', 'close']


def predict(rnn, data_builder):
    data_builder.build_data()
    entry_set = data_builder.test_entry_set
    scaler_prediction = rnn.predict(entry_set)

    return data_builder.inverse_transform(scaler_prediction)


def to_image(predicted_results, real_results, operation, company):
    plt.plot(real_results, color='red', label='Real Results')
    plt.plot(predicted_results, color='blue', label='Predicted Results')
    plt.title(company + ' Test Prediction')
    plt.xlabel('Time')
    plt.ylabel(company + ' Price')
    plt.legend()
    plt.savefig(OUTPUT_DATA + '/' + operation + '.png')


operation = MODEL_FILE.split('/')[len(MODEL_FILE.split('/')) - 1].split('.')[0]
company = DATA_SET.split('/')[len(DATA_SET.split('/')) - 1].split('.')[0]
if (not os.path.isdir(OUTPUT_DATA)):
    pathlib.Path(OUTPUT_DATA).mkdir(parents=True, exist_ok=True)

rnn = tf.keras.models.load_model(MODEL_FILE)
csv_dataset = pd.read_csv(DATA_SET)
data_builder = DataBuilder(csv_dataset, ENTRY_COLUMNS, operation)
predicted_results = predict(rnn, data_builder)
to_image(predicted_results, data_builder.real_test_results, operation, company)