def main(_): config = cfg.Config() data_builder = DataBuilder(config) train_set, evaluate_set = data_builder.build_data(config.path_data) model = Model(config) model.build() model.train(train_set, evaluate_set)
def __init__(self, trading_data): self.config = trading_data.config self.company = trading_data.company data_set = pd.read_csv(trading_data.csv_dataset) self.data_builder = DataBuilder(data_set, trading_data.entry_columns, trading_data.prediction_column) self.data_builder.build_data()
def initialize(online, version, use_cherrypy): # configure the logger global logger if use_cherrypy: import cherrypy logger = cherrypy.log else: import multiprocessing logger = multiprocessing.get_logger() # create a database session connectionString = ConnectionString() engine = None if online == 'True': engine = create_engine(connectionString.connectUrlonline) else: engine = create_engine(connectionString.connectUrloffline) Base.prepare(engine, reflect=True) session = scoped_session(sessionmaker(engine)) # store the version id global databuilder databuilder = DataBuilder(session, version, logger)
def fetch(self, start=None, stop=None): """Fetch data from the websites in the interval [start, stop].""" start = start if start is not None else 0 stop = min(stop, len(self.sites)) if stop is not None else len( self.sites) for i in range(start, stop): url = self.sites[i] name = self.get_filename(url) # Build data output builder = DataBuilder() builder.append('URL', url) self.fetch_site(builder, url) self.fetch_alexa(builder, url) self.fetch_robots(builder, url) self.fetch_pagerank(builder, url) self.write_output_file(name, builder.generate()) self.log_output('Fetched (%s in range %i to %i): %s' % (str(i + 1), start, stop, url))
def fetch(self, start=None, stop=None): """Fetch data from the websites in the interval [start, stop].""" start = start if start is not None else 0 stop = min(stop, len( self.sites)) if stop is not None else len(self.sites) for i in range(start, stop): url = self.sites[i] name = self.get_filename(url) # Build data output builder = DataBuilder() builder.append('URL', url) self.fetch_site(builder, url) self.fetch_alexa(builder, url) self.fetch_robots(builder, url) self.fetch_pagerank(builder, url) self.write_output_file(name, builder.generate()) self.log_output('Fetched (%s in range %i to %i): %s' % ( str(i + 1), start, stop, url))
def createConfig(self, ver, cnf, db, online, folder, use_cherrypy = False): if use_cherrypy: import cherrypy self.logger = cherrypy.log # implement part of the logging interface in cherrypy logger def log(self, lvl, msg, *args, **kwargs): if args: msg = msg % args if kwargs and 'exc_info' in kwargs and kwargs['exc_info']: traceback = True else: traceback = False self.error(msg, '', lvl, traceback) def debug(self, msg, *args, **kwargs): self.log(logging.DEBUG, msg, *args, **kwargs) def info(self, msg, *args, **kwargs): self.log(logging.INFO, msg, *args, **kwargs) def warning(self, msg, *args, **kwargs): self.log(logging.WARNING, msg, *args, **kwargs) def critical(self, msg, *args, **kwargs): self.log(logging.CRITICAL, msg, *args, **kwargs) import types self.logger.log = types.MethodType(log, self.logger) self.logger.debug = types.MethodType(debug, self.logger) self.logger.info = types.MethodType(info, self.logger) self.logger.warning = types.MethodType(warning, self.logger) self.logger.critical = types.MethodType(critical, self.logger) else: formatter = logging.Formatter("[%(asctime)s - %(levelname)s/%(processName)s] %(name)s: %(message)s") handler = logging.StreamHandler() handler.setFormatter(formatter) self.logger = multiprocessing.get_logger() self.logger.setLevel(logging.INFO) self.logger.addHandler(handler) version = None release = None try: version = DataBuilder.getRequestedVersion(ver, cnf, db, self.logger) except Exception as e: msg = 'ERROR: Query getRequestedVersion Error: ' + e.args[0] self.logger.error(msg) if version == None: print "\nCould Not Find The Requested Version.\n" return self.data_builder = DataBuilder(db, version, self.logger) try: release = self.queries.getRelease(version.id_release, db, self.logger) except Exception as e: msg = 'ERROR: Query getRelease Error: ' + e.args[0] self.logger.error(msg) # Adding File's Headers self.logger.info('retrieving header...') header = self.writeHeader(version.name, release.releasetag, version.processname) self.logger.info('done') tasks = [ ('modules', 'getModules'), ('paths', 'getPaths'), ('sequences', 'getSequences'), ('datasets', 'getDatasetsPaths'), ('psets', 'getGlobalPsets'), ('streams', 'getStreams'), ('source', 'getSource'), ('esproducers', 'getESModules'), ('essources', 'getESSources'), ('services', 'getServices'), ('outputmodules', 'getOutputModules'), ('endpaths', 'getEndPaths'), ('schedule', 'getSchedule'), ] # build each part of the configuration parts = {} import task if self.threads == 1: # single threaded version import itertools task.initialize(online, version, use_cherrypy) task_iterator = itertools.imap(task.worker, tasks) else: # multiprocess version pool = multiprocessing.Pool(processes = self.threads, initializer = task.initialize, initargs = (online, version, use_cherrypy)) task_iterator = pool.imap_unordered(task.worker, tasks) for (key, value, error) in task_iterator: parts[key] = value if error: self.logger.critical(error) return None # cleanup the task pool if self.threads != 1: pool.close() pool.join() # combinine all parts data = \ header + \ parts['psets'] + \ parts['streams'] + \ parts['datasets'] + \ parts['source'] + \ parts['essources'] + \ parts['esproducers'] + \ parts['services'] + \ parts['modules'] + \ parts['outputmodules'] + \ parts['sequences'] + \ parts['paths'] + \ parts['endpaths'] + \ parts['schedule'] filename = folder + '/HLT.py' try: file = open(filename, 'w') file.write(data) file.close() return filename except Exception as e: msg = 'ERROR: Writting to file %s: %s' % (filename, e.args[0]) self.logger.error(msg) return None
from data_builder import DataBuilder import pandas as pd if __name__ == '__main__': dataBuilder = DataBuilder() lastUpdated = dataBuilder.getLastUpdated() print("Database was last updated " + str(lastUpdated[0]) + "." + str(lastUpdated[1]) + "." + str(lastUpdated[2])) answer = input("Type 'Y' to update database: ") if (answer == "Y"): dataBuilder.buildDatabaseFromDate(lastUpdated[0], lastUpdated[1], lastUpdated[2]) dataBuilder.updateLastUpdated() print("Database updated!")
from flask import Flask, request from data_builder import DataBuilder app = Flask(__name__) # ROUTES FOR DATA @app.route('/trackerdata', methods=['POST']) def tracker_data(): return d.tracker_data() @app.route('/regiondata', methods=['POST']) def region_data(): region = request.args.get('region') return d.region_data(region) @app.route('/totaldata', methods=['POST']) def total_data(): return d.total_data() # ROUTES FOR NEWS @app.route('/news', methods=['POST']) def method_name(): pass if __name__ == '__main__': d = DataBuilder() app.run()
class RNNWrapper: def __init__(self, trading_data): self.config = trading_data.config self.company = trading_data.company data_set = pd.read_csv(trading_data.csv_dataset) self.data_builder = DataBuilder(data_set, trading_data.entry_columns, trading_data.prediction_column) self.data_builder.build_data() def build(self): training_entry_set = self.data_builder.training_entry_set input_shape = (training_entry_set.shape[1], training_entry_set.shape[2]) self.sequential = Sequential() for i in range(input_shape[1]): last_iteration = i == input_shape[1]-1 self.sequential.add(LSTM(units = self.config.layer_units, recurrent_activation = self.config.activation_function, return_sequences = not last_iteration, input_shape = input_shape)) self.sequential.add(Dense(units = 1)) optimizer = Adam(learning_rate=self.config.learning_rate, beta_1=self.config.beta_1, beta_2=self.config.beta_2) self.sequential.compile(optimizer = optimizer, loss = self.config.loss) def fit(self): entry_set = self.data_builder.training_entry_set result_set = self.data_builder.training_result_set rlrop = ReduceLROnPlateau(monitor=self.config.monitor, factor=self.config.factor, patience=self.config.patiente) with tf.device(self.config.device): self.sequential.fit(entry_set, result_set, epochs = self.config.epochs, batch_size = self.config.batch_size, callbacks=[rlrop]) def predict(self): entry_set = self.data_builder.test_entry_set scaler_prediction = self.sequential.predict(entry_set) self.predicted_results = self.data_builder.inverse_transform(scaler_prediction) def load_real_results(self): self.real_results = self.data_builder.real_test_results def to_image(self): plt.clf() plt.plot(self.real_results, color = 'red', label = 'Real Results') plt.plot(self.predicted_results, color = 'blue', label = 'Predicted Results') plt.title(self.company + ' Prediction') plt.xlabel('Time') plt.ylabel(self.company + ' Price') plt.legend() return plt def save(self, path): self.sequential.save(path)
def createConfig(self, ver, cnf, db, online, folder, request=None, use_cherrypy=False): ver = ver cnf = cnf if use_cherrypy: import cherrypy self.logger = cherrypy.log cache_session = request.db_cache if online == 'True' or online == 'true': src = 1 else: src = 0 if (cnf != -2 and cnf != -1): cnf = cache.folMappingDictGetExternal(cnf, src, "cnf", cache_session, log) # implement part of the logging interface in cherrypy logger def log(self, lvl, msg, *args, **kwargs): if args: msg = msg % args if kwargs and 'exc_info' in kwargs and kwargs['exc_info']: traceback = True else: traceback = False self.error(msg, '', lvl, traceback) def debug(self, msg, *args, **kwargs): self.log(logging.DEBUG, msg, *args, **kwargs) def info(self, msg, *args, **kwargs): self.log(logging.INFO, msg, *args, **kwargs) def warning(self, msg, *args, **kwargs): self.log(logging.WARNING, msg, *args, **kwargs) def critical(self, msg, *args, **kwargs): self.log(logging.CRITICAL, msg, *args, **kwargs) import types self.logger.log = types.MethodType(log, self.logger) self.logger.debug = types.MethodType(debug, self.logger) self.logger.info = types.MethodType(info, self.logger) self.logger.warning = types.MethodType(warning, self.logger) self.logger.critical = types.MethodType(critical, self.logger) else: formatter = logging.Formatter( "[%(asctime)s - %(levelname)s/%(processName)s] %(name)s: %(message)s" ) handler = logging.StreamHandler() handler.setFormatter(formatter) self.logger = multiprocessing.get_logger() self.logger.setLevel(logging.INFO) self.logger.addHandler(handler) version = None release = None try: version = DataBuilder.getRequestedVersion(ver, cnf, db, self.logger) except Exception as e: msg = 'ERROR: Query getRequestedVersion Error: ' + e.args[0] self.logger.error(msg) if version == None: print "\nCould Not Find The Requested Version.\n" return self.data_builder = DataBuilder(db, version, self.logger) try: release = self.queries.getRelease(version.id_release, db, self.logger) except Exception as e: msg = 'ERROR: Query getRelease Error: ' + e.args[0] self.logger.error(msg) # Adding File's Headers self.logger.info('retrieving header...') header = self.writeHeader(version.name, release.releasetag, version.processname) self.logger.info('done') tasks = [ ('modules', 'getModules'), ('paths', 'getPaths'), ('sequences', 'getSequences'), ('datasets', 'getDatasetsPaths'), ('psets', 'getGlobalPsets'), ('streams', 'getStreams'), ('source', 'getSource'), ('esproducers', 'getESModules'), ('essources', 'getESSources'), ('services', 'getServices'), ('outputmodules', 'getOutputModules'), ('endpaths', 'getEndPaths'), ('schedule', 'getSchedule'), ] # build each part of the configuration parts = {} import task if self.threads == 1: # single threaded version import itertools task.initialize(online, version, use_cherrypy) task_iterator = itertools.imap(task.worker, tasks) else: # multiprocess version pool = multiprocessing.Pool(processes=self.threads, initializer=task.initialize, initargs=(online, version, use_cherrypy)) task_iterator = pool.imap_unordered(task.worker, tasks) for (key, value, error) in task_iterator: parts[key] = value if error: self.logger.critical(error) return None # cleanup the task pool if self.threads != 1: pool.close() pool.join() # combinine all parts data = \ header + \ parts['psets'] + \ parts['streams'] + \ parts['datasets'] + \ parts['source'] + \ parts['essources'] + \ parts['esproducers'] + \ parts['services'] + \ parts['modules'] + \ parts['outputmodules'] + \ parts['sequences'] + \ parts['paths'] + \ parts['endpaths'] + \ parts['schedule'] filename = folder + '/HLT.py' try: file = open(filename, 'w') file.write(data) file.close() return filename except Exception as e: msg = 'ERROR: Writting to file %s: %s' % (filename, e.args[0]) self.logger.error(msg) return None
import os import sys sys.path.append("../lib") import utils from config import Config from downloader import Downloader from data_builder import DataBuilder config = Config("/usr/dev/speech-separation/config.yaml") #downloader = Downloader(config) data_builder = DataBuilder(config, config.data.num_workers * 2) #utils.make_dirs("../data") #utils.make_dirs("../data/audio") #utils.make_dirs("../data/frames") # Download data #downloader.download_data("../data/csv/avspeech_train.csv", 0, 55000, wait_tasks=False) #downloader.download_data("../data/csv/avspeech_test.csv", 0, 1000) #downloader.download_noise_data("../data/csv/noise.csv", 0, 1000) # Build data #data_builder.build_embs(wait_tasks=False) #data_builder.build_embs(is_train=False) data_builder.build_audio(38000, wait_tasks=False) data_builder.build_audio(1000, is_train=False)
ENTRY_COLUMNS = ['open', 'close'] def predict(rnn, data_builder): data_builder.build_data() entry_set = data_builder.test_entry_set scaler_prediction = rnn.predict(entry_set) return data_builder.inverse_transform(scaler_prediction) def to_image(predicted_results, real_results, operation, company): plt.plot(real_results, color='red', label='Real Results') plt.plot(predicted_results, color='blue', label='Predicted Results') plt.title(company + ' Test Prediction') plt.xlabel('Time') plt.ylabel(company + ' Price') plt.legend() plt.savefig(OUTPUT_DATA + '/' + operation + '.png') operation = MODEL_FILE.split('/')[len(MODEL_FILE.split('/')) - 1].split('.')[0] company = DATA_SET.split('/')[len(DATA_SET.split('/')) - 1].split('.')[0] if (not os.path.isdir(OUTPUT_DATA)): pathlib.Path(OUTPUT_DATA).mkdir(parents=True, exist_ok=True) rnn = tf.keras.models.load_model(MODEL_FILE) csv_dataset = pd.read_csv(DATA_SET) data_builder = DataBuilder(csv_dataset, ENTRY_COLUMNS, operation) predicted_results = predict(rnn, data_builder) to_image(predicted_results, data_builder.real_test_results, operation, company)