def consume_data(self, data, passback, output_dir):
     open_threads = []
     _, max_processes, _ = config_parser_singleton.read_execution_options()
     max_processes = multiprocessing.cpu_count() if max_processes == -1 else max_processes
     with multiprocessing.Pool(max_processes) as pool:
         out_dir = output_dir + path.sep + "ann_models"
         if not path.exists(out_dir):
             os.mkdir(out_dir)
         for ticker, training_data in data.items():
             open_threads.append(pool.apply_async(handle_data, [ticker, training_data,
                                                                out_dir, self._overwite_existing],
                                                  {'trend_lookahead': self._trend_lookahead,
                                                   'combined_examples': self._combined_examples_factor}))
         for t in tqdm.tqdm(open_threads):
             t.get()
示例#2
0
 def consume_data(self, data, passback, output_dir):
     out_dir = output_dir + path.sep + 'svm_strength_models'
     if not path.exists(out_dir):
         os.mkdir(out_dir)
     exec_options = config_parser_singleton.read_execution_options()
     max_processes = exec_options[1]
     max_processes = multiprocessing.cpu_count(
     ) if max_processes == -1 else max_processes
     with multiprocessing.Pool(max_processes) as pool:
         open_jobs = []
         for ticker, training_data in data.items():
             job = pool.apply_async(
                 handle_data,
                 [ticker, training_data, out_dir, self._overwrite_existing],
                 {'combined_examples': self._combined_examples_factor})
             open_jobs.append(job)
         for job in tqdm.tqdm(open_jobs):
             job.get()
示例#3
0
 def consume_data(self, data, passback, output_dir):
     out_dir = output_dir + path.sep + 'random_forest_models'
     if not path.exists(out_dir):
         os.mkdir(out_dir)
     _, max_processes, _ = config_parser_singleton.read_execution_options()
     max_processes = multiprocessing.cpu_count(
     ) if max_processes == -1 else max_processes
     with multiprocessing.Pool(max_processes) as pool:
         tasks = []
         for ticker, training_data in data.items():
             tasks.append(
                 pool.apply_async(
                     handle_model_creation, [
                         ticker, training_data, out_dir,
                         self._overwrite_existing
                     ], {"combined_examples": self._periods_per_example}))
         for task in tqdm.tqdm(tasks):
             task.get()
示例#4
0
 def predict_data(self, data, passback, in_model_dir):
     out_dir = in_model_dir + path.sep + 'similarity_analysis'
     if not path.exists(out_dir):
         os.mkdir(out_dir)
     exec_options = config_parser_singleton.read_execution_options()
     max_processes = exec_options[1]
     max_processes = multiprocessing.cpu_count() if max_processes == -1 else max_processes
     with multiprocessing.Pool(max_processes) as pool:
         open_jobs = []
         for ticker, prediction_data in data.items():
             open_jobs.append(pool.apply_async(
                 predict_data,
                 [ticker, out_dir, prediction_data],
                 {'combined_examples': self._combined_examples_factor,
                  'num_similar_regions': self._num_similar_regions}
             ))
         for job in open_jobs:
             job.get()
 def predict_data(self, data, passback, in_model_dir):
     model_dir = in_model_dir + path.sep + 'ann_models'
     if not path.exists(model_dir):
         raise FileNotFoundError("Model storage directory for ANN prediction does not exist. Please run model "
                                 "creation without the prediction flag set to true to create models used in "
                                 "prediction.")
     predictions = {}
     _, max_processes, _ = config_parser_singleton.read_execution_options()
     max_processes = multiprocessing.cpu_count() if max_processes == -1 else max_processes
     with multiprocessing.Pool(max_processes) as pool:
         working_threads = []
         for ticker, prediction_data in data.items():
             working_threads.append(pool.apply_async(predict_data,
                                                     [ticker, model_dir, prediction_data],
                                                     {'trend_lookahead': self._trend_lookahead,
                                                      'combined_examples': self._combined_examples_factor}))
         for worker in tqdm.tqdm(working_threads):
             result = worker.get()
             if result is not None:
                 ticker, actual_prediction, accuracy = result
                 predictions[ticker] = (actual_prediction, accuracy)
     return predictions
    def pass_data(self, output_dir, stop_for_errors=False, print_errors=True):
        provider = None
        consumer = None
        columns = None
        predict, _, export_data = read_execution_options()
        ret_predictions = {}
        for provKey, provider in self.providers.items():
            try:
                if provKey not in self.consumers.keys():
                    continue
                registeredConsumers = self.consumers[provKey]
                for consumer_set in registeredConsumers:
                    consumer = None
                    args = None
                    passback = None
                    keyword_args = {}
                    if len(consumer_set) == 3:
                        consumer, args, passback = consumer_set
                    elif len(consumer_set) == 2:
                        consumer, args = consumer_set
                    elif len(consumer_set) == 1:
                        consumer = consumer_set[0]
                    elif len(consumer_set) == 4:
                        consumer, args, passback, keyword_args = consumer_set
                    else:
                        raise ValueError(
                            "Invalid number of consumer registration arguments"
                        )

                    if not predict:
                        consumer.consume_data(
                            provider.generate_data(*args, **keyword_args),
                            passback, output_dir)
                    else:
                        predictions = consumer.predict_data(
                            provider.generate_prediction_data(
                                *args, **keyword_args), passback, output_dir)
                        consumer_passback_id = str(
                            type(consumer)) + str(passback)
                        if consumer_passback_id in self._prediction_string_serializers and not export_data:
                            predictions = self._prediction_string_serializers[
                                consumer_passback_id](predictions)
                        elif consumer_passback_id in self._data_exportation_functions and export_data:
                            self._data_exportation_functions[
                                consumer_passback_id](predictions, output_dir)
                        ret_predictions[consumer_passback_id] = predictions
            except Exception:
                if print_errors:
                    traceback.print_exc()
                    logger.logger.log(
                        logger.NON_FATAL_ERROR,
                        "Above error was encountered during processing "
                        "of the following provider/consumer pair")
                    logger.logger.log(
                        logger.NON_FATAL_ERROR,
                        "\t%s %s" % (type(provider), type(consumer)))
                    logger.logger.log(
                        logger.NON_FATAL_ERROR,
                        "With the following columns as a data argument")
                    logger.logger.log(logger.NON_FATAL_ERROR,
                                      "\t%s" % str(columns))
                if stop_for_errors:
                    return
        return ret_predictions
示例#7
0
from data_providing_module import configurable_registry
from data_providing_module.data_provider_registry import registry
from general_utils.config.config_parser_singleton import parser, update_config, read_execution_options
from general_utils.exportation import csv_amalgamation
from general_utils.logging import logger

# os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"

if __name__ == "__main__":
    import sys
    args = sys.argv[1:]
    providers = os.listdir("data_providing_module/data_providers")
    for provider in providers:
        if provider.startswith('__'):
            continue
        importlib.import_module('data_providing_module.data_providers.' +
                                provider.replace('.py', ''))
    consumers = os.listdir("training_managers")
    for consumer in consumers:
        if consumer.startswith('__'):
            continue
        importlib.import_module("training_managers." +
                                consumer.replace('.py', ''))
    configurable_registry.config_registry.handle_configurables(parser)
    predict, max_processes, export_predictions = read_execution_options()
    update_config()
    ret_predictions = registry.pass_data(args[0], stop_for_errors=False)
    if predict and not export_predictions:
        for passback, predictions in ret_predictions.items():
            logger.logger.log(logger.OUTPUT, predictions)