Пример #1
0
def test_dataspace_config_finds_bad():
    with pytest.raises(dataspace.DataSpaceConfigurationError) as e:
        dataspace.DataSpace({})
    assert e.match('missing dataspace information')

    with pytest.raises(dataspace.DataSpaceConfigurationError) as e:
        dataspace.DataSpace({'dataspace': 'asdf'})
    assert e.match('dataspace key must correspond to a dictionary')

    with pytest.raises(dataspace.DataSpaceConfigurationError) as e:
        dataspace.DataSpace({'dataspace': {'asdf': 'asdf'}})
    assert e.match('Invalid dataspace configuration')
Пример #2
0
def test_dataspace_config_finds_bad():
    with pytest.raises(ds.DataSpaceConfigurationError) as e:
        ds.DataSpace({})
    assert e.match("missing dataspace information")

    with pytest.raises(ds.DataSpaceConfigurationError) as e:
        ds.DataSpace({"dataspace": "asdf"})
    assert e.match("dataspace key must correspond to a dictionary")

    with pytest.raises(ds.DataSpaceConfigurationError) as e:
        ds.DataSpace({"dataspace": {"asdf": "asdf"}})
    assert e.match("Invalid dataspace configuration")
def main():
    """
    Call this a a test unit or use as CLI of this module
    """
    import argparse
    parser = argparse.ArgumentParser()

    parser.add_argument('--configtemplate',
                        action='store_true',
                        help='prints the expected module configuration')

    parser.add_argument(
        '--configinfo',
        action='store_true',
        help='prints config template along with produces and consumes info')
    args = parser.parse_args()
    if args.configtemplate:
        module_config_template()
    elif args.configinfo:
        module_config_info()
    else:
        config_manager = configmanager.ConfigManager()
        config_manager.load()
        global_config = config_manager.get_global_config()
        print("GLOBAL CONF", global_config)
        ds = dataspace.DataSpace(global_config)
Пример #4
0
 def __init__(self, name, generation_id, channel_dict, global_config):
     """
     :type name: :obj:`str`
     :arg name: Name of channel corresponding to this task manager
     :type generation_id: :obj:`int`
     :arg generation_id: Task Manager generation id provided by caller
     :type channel_dict: :obj:`dict`
     :arg channel_dict: channel configuration
     :type global_config: :obj:`dict`
     :arg global_config: global configuration
      """
     self.id = str(uuid.uuid4()).upper()
     self.dataspace = dataspace.DataSpace(global_config)
     self.data_block_t0 = datablock.DataBlock(
         self.dataspace, name, self.id,
         generation_id)  # my current data block
     self.name = name
     self.channel = Channel(channel_dict)
     self.state = ProcessingState()
     self.loglevel = multiprocessing.Value('i', logging.WARNING)
     self.lock = threading.Lock()
     # The rest of this function will go away once the source-proxy
     # has been reimplemented.
     for src_worker in self.channel.sources.values():
         src_worker.worker.post_create(global_config)
Пример #5
0
    def __init__(self, global_config, channel_config_loader, server_address):
        xmlrpc.server.SimpleXMLRPCServer.__init__(
            self,
            server_address,
            logRequests=False,
            requestHandler=RequestHandler)
        signal.signal(signal.SIGHUP, self.handle_sighup)
        self.channel_config_loader = channel_config_loader
        self.global_config = global_config
        self.dataspace = dataspace.DataSpace(self.global_config)
        self.reaper = Reaper(self.global_config)
        self.startup_complete = Event()
        self.logger = structlog.getLogger(LOGGERNAME)
        self.logger = self.logger.bind(module=__name__.split(".")[-1],
                                       channel=DELOGGER_CHANNEL_NAME)
        self.logger.debug(f"DecisionEngine starting on {server_address}")

        exchange_name = self.global_config.get("exchange_name",
                                               "hepcloud_topic_exchange")
        self.logger.debug(f"Creating topic exchange {exchange_name}")
        self.exchange = Exchange(exchange_name, "topic")
        self.broker_url = self.global_config.get("broker_url",
                                                 "redis://localhost:6379/0")
        _verify_redis_server(self.broker_url)

        self.source_workers = SourceWorkers(self.exchange, self.broker_url,
                                            self.logger)
        self.channel_workers = ChannelWorkers()

        self.register_function(self.rpc_metrics, name="metrics")

        self.logger.info(
            f"DecisionEngine __init__ complete {server_address} with {self.broker_url}"
        )
Пример #6
0
def dataspace(request):
    """
    This parameterized fixture will setup up various datasources.
    Add datasource objects to DATASOURCES_TO_TEST once they've got
    our basic schema loaded.  And adjust our `if` statements here
    until we are SQLAlchemy only.

    Pytest should take it from there and automatically run it
    through all the tests using this fixture.
    """
    conn_fixture = request.getfixturevalue(request.param)

    db_info = {}
    try:
        # SQL Alchemy
        db_info["url"] = conn_fixture["url"]
        db_info["echo"] = True  # put into extra chatty mode for tests
    except TypeError:
        try:
            # psycopg2
            db_info["host"] = conn_fixture.info.host
            db_info["port"] = conn_fixture.info.port
            db_info["user"] = conn_fixture.info.user
            db_info["password"] = conn_fixture.info.password
            db_info["database"] = conn_fixture.info.dbname
        except AttributeError:
            # psycopg2cffi
            for element in conn_fixture.dsn.split():
                (key, value) = element.split("=")
                if value != "''" and value != '""':
                    db_info[key] = value

    config = {}
    config["dataspace"] = {}
    config["dataspace"]["datasource"] = {}
    config["dataspace"]["datasource"]["config"] = db_info

    if request.param == "PG_DE_DB_WITH_SCHEMA":
        config["dataspace"]["datasource"][
            "module"] = "decisionengine.framework.dataspace.datasources.postgresql"
        config["dataspace"]["datasource"]["name"] = "Postgresql"
    elif "SQLALCHEMY" in request.param:
        config["dataspace"]["datasource"][
            "module"] = "decisionengine.framework.dataspace.datasources.sqlalchemy_ds"
        config["dataspace"]["datasource"]["name"] = "SQLAlchemyDS"

    my_ds = ds.DataSpace(config)
    load_sample_data_into_datasource(my_ds)

    yield my_ds

    del my_ds
    gc.collect()
Пример #7
0
    def start_channel(self, channel_name, channel_config):
        channel_config = copy.deepcopy(channel_config)
        with START_CHANNEL_HISTOGRAM.labels(channel_name).time():
            # NB: Possibly override channel name
            channel_name = channel_config.get("channel_name", channel_name)
            source_configs = channel_config.pop("sources")
            src_workers = self.source_workers.update(channel_name,
                                                     source_configs)
            module_workers = validated_workflow(channel_name, src_workers,
                                                channel_config, self.logger)

            queue_info = [(worker.queue.name, worker.key)
                          for worker in src_workers.values()]
            self.logger.debug(f"Building TaskManger for {channel_name}")
            task_manager = TaskManager.TaskManager(
                channel_name,
                module_workers,
                dataspace.DataSpace(self.global_config),
                source_products(src_workers),
                self.exchange,
                self.broker_url,
                queue_info,
            )
            self.logger.debug(f"Building Worker for {channel_name}")
            worker = ChannelWorker(task_manager, self.global_config["logger"])
            WORKERS_COUNT.inc()
            with self.channel_workers.access() as workers:
                workers[channel_name] = worker

            # The channel must be started first so it can listen for the messages from the sources.
            self.logger.debug(f"Trying to start {channel_name}")
            worker.start()
            self.logger.info(f"Channel {channel_name} started")

            worker.wait_while(ProcessingState.State.BOOT)

            # Start any sources that are not yet alive.
            for key, src_worker in src_workers.items():
                if src_worker.is_alive():
                    continue
                if src_worker.exitcode == 0:  # pragma: no cover
                    # This can happen if the source's acquire method runs only once (e.g. when testing)
                    # and the first process completes before the next channel can use it.
                    raise RuntimeError(
                        f"The {key} source has already completed and cannot be used by channel {channel_name}."
                    )

                src_worker.start()
                self.logger.debug(
                    f"Started process {src_worker.pid} for source {key}")

            worker.wait_while(ProcessingState.State.ACTIVE)
    def __init__(self, cfg, server_address, RequestHandlerClass):
        SimpleXMLRPCServer.SimpleXMLRPCServer.__init__(
            self,
            server_address,
            logRequests=False,
            requestHandler=RequestHandlerClass)

        self.logger = logging.getLogger("decision_engine")
        signal.signal(signal.SIGHUP, self.handle_sighup)
        self.task_managers = {}
        self.config_manager = cfg
        self.dataspace = dataspace.DataSpace(
            self.config_manager.get_global_config())
Пример #9
0
 def __init__(self, *args, **kwargs):
     if not set(must_have).issubset(set(args[0].keys())):
         raise RuntimeError(
             'SourceProxy misconfigured. Must have %s defined' %
             (must_have, ))
     self.source_channel = args[0]['channel_name']
     self.data_keys = args[0]['Dataproducts']
     self.retries = args[0].get('retries', RETRIES)
     self.retry_to = args[0].get('retry_timeout', RETRY_TO)
     self.logger = de_logger.get_logger()
     config_manager = configmanager.ConfigManager()
     config_manager.load()
     global_config = config_manager.get_global_config()
     self.dataspace = dataspace.DataSpace(global_config)
Пример #10
0
    def __init__(self, global_config, channel_config_loader, server_address):
        xmlrpc.server.SimpleXMLRPCServer.__init__(self,
                                                  server_address,
                                                  logRequests=False,
                                                  requestHandler=RequestHandler)

        self.logger = logging.getLogger("decision_engine")
        signal.signal(signal.SIGHUP, self.handle_sighup)
        self.workers = Workers()
        self.channel_config_loader = channel_config_loader
        self.global_config = global_config
        self.dataspace = dataspace.DataSpace(self.global_config)
        self.reaper = dataspace.Reaper(self.global_config)
        self.logger.info("DecisionEngine started on {}".format(server_address))
Пример #11
0
def main():
    """
    Call this a a test unit or use as CLI of this module
    """
    import argparse
    parser = argparse.ArgumentParser()

    parser.add_argument('--configtemplate',
                        action='store_true',
                        help='prints the expected module configuration')

    parser.add_argument(
        '--configinfo',
        action='store_true',
        help='prints config template along with produces and consumes info')
    args = parser.parse_args()
    if args.configtemplate:
        module_config_template()
    elif args.configinfo:
        module_config_info()
    else:
        config_manager = configmanager.ConfigManager()
        config_manager.load()
        global_config = config_manager.get_global_config()
        print("GLOBAL CONF", global_config)
        ds = dataspace.DataSpace(global_config)

        data_block = datablock.DataBlock(
            ds,
            # '5CC840DD-88B9-45CE-9DA2-FF531289AC66',
            'C56E0AAF-99D3-42A8-88A3-921E30C1879C',
            1)

        fm_info = AWSFOMPublisher({
            "publish_to_graphite":
            True,
            "graphite_host":
            "fifemondata.fnal.gov",
            "graphite_port":
            2104,
            "graphite_context":
            "hepcloud.aws",
            "output_file":
            "%s/de_data/AWS_figure_of_merit.csv" % (os.environ.get('HOME'), )
        })
        rc = fm_info.publish(data_block)
Пример #12
0
    def __init__(self, global_config, channel_config_loader, server_address):
        xmlrpc.server.SimpleXMLRPCServer.__init__(
            self,
            server_address,
            logRequests=False,
            requestHandler=RequestHandler)

        signal.signal(signal.SIGHUP, self.handle_sighup)
        self.workers = Workers()
        self.channel_config_loader = channel_config_loader
        self.global_config = global_config
        self.dataspace = dataspace.DataSpace(self.global_config)
        self.reaper = Reaper(self.global_config)
        self.startup_complete = Event()
        self.logger = structlog.getLogger(LOGGERNAME)
        self.logger = self.logger.bind(module=__name__.split(".")[-1],
                                       channel=DELOGGER_CHANNEL_NAME)
        self.logger.info(f"DecisionEngine started on {server_address}")
Пример #13
0
    def __init__(self, global_config, channel_config_loader, server_address):
        xmlrpc.server.SimpleXMLRPCServer.__init__(
            self, server_address, logRequests=False, requestHandler=RequestHandler
        )
        signal.signal(signal.SIGHUP, self.handle_sighup)
        self.source_workers = {}
        self.channel_workers = Workers()
        self.channel_config_loader = channel_config_loader
        self.global_config = global_config
        self.dataspace = dataspace.DataSpace(self.global_config)
        self.reaper = Reaper(self.global_config)
        self.startup_complete = Event()
        self.logger = structlog.getLogger(LOGGERNAME)
        self.logger = self.logger.bind(module=__name__.split(".")[-1], channel=DELOGGER_CHANNEL_NAME)
        self.logger.info(f"DecisionEngine started on {server_address}")
        self.register_function(self.rpc_metrics, name="metrics")
        if not global_config.get("no_webserver"):
            self.start_webserver()

        self.broker_url = self.global_config.get("broker_url", "redis://localhost:6379/0")
        _verify_redis_server(self.broker_url)
Пример #14
0
def dataspace(request):
    """
    This parameterized fixture will setup up various datasources.
    Add datasource objects to DATASOURCES_TO_TEST once they've got
    our basic schema loaded.  And adjust our `if` statements here
    until we are SQLAlchemy only.

    Pytest should take it from there and automatically run it
    through all the tests using this fixture.
    """
    conn_fixture = request.getfixturevalue(request.param)

    db_info = {}
    try:
        # SQL Alchemy
        db_info["url"] = conn_fixture["url"]
        db_info["echo"] = True  # put into extra chatty mode for tests
    except TypeError:
        # psycopg2
        db_info["host"] = conn_fixture.info.host
        db_info["port"] = conn_fixture.info.port
        db_info["user"] = conn_fixture.info.user
        db_info["password"] = conn_fixture.info.password
        db_info["database"] = conn_fixture.info.dbname

    config = {}
    config["dataspace"] = {}
    config["dataspace"]["datasource"] = {}
    config["dataspace"]["datasource"]["config"] = db_info

    config["dataspace"]["datasource"]["module"] = "decisionengine.framework.dataspace.datasources.sqlalchemy_ds"
    config["dataspace"]["datasource"]["name"] = "SQLAlchemyDS"

    my_ds = ds.DataSpace(config)
    load_sample_data_into_datasource(my_ds)

    yield my_ds

    del my_ds
    gc.collect()
Пример #15
0
 def __init__(self, name, task_manager_id, generation_id, channel_dict,
              global_config):
     """
     :type task_manager_id: :obj:`int`
     :arg task_manager_id: Task Manager id provided by caller
     :type channel_dict: :obj:`dict`
     :arg channel_dict: channel configuration
     :type data_block: :obj:`~datablock.DataBlock`
     :arg data_block: data block
     """
     self.dataspace = dataspace.DataSpace(global_config)
     self.data_block_t0 = datablock.DataBlock(
         self.dataspace, name, task_manager_id,
         generation_id)  # my current data block
     self.name = name
     self.id = task_manager_id
     self.channel = Channel(channel_dict)
     self.state = multiprocessing.Value('i', BOOT)
     self.decision_cycle_active = False
     self.lock = threading.Lock()
     self.logger = de_logger.get_logger()
     self.stop = False  # stop running all loops when this is True
Пример #16
0
def dspace(datasource):
    global_config = {
        'dataspace': {
            'reaper_start_delay_seconds': 1818,
            'retention_interval_in_days': 365,
            'datasource': {
                'module':
                'decisionengine.framework.dataspace.datasources.postgresql',
                'name': 'Postgresql',
                'config': {
                    'user': datasource.info.dsn_parameters["user"],
                    'blocking': True,
                    'host': datasource.info.dsn_parameters["host"],
                    'port': datasource.info.dsn_parameters["port"],
                    'database': datasource.info.dsn_parameters["dbname"],
                    'maxconnections': 100,
                    'maxcached': 10,
                },
            },
        }
    }
    return dataspace.DataSpace(global_config)
Пример #17
0
def main():
    """
    Call this a a test unit or use as CLI of this module
    """
    import argparse
    parser = argparse.ArgumentParser()

    parser.add_argument('--configtemplate',
                        action='store_true',
                        help='prints the expected module configuration')

    parser.add_argument(
        '--configinfo',
        action='store_true',
        help='prints config template along with produces and consumes info')
    args = parser.parse_args()
    if args.configtemplate:
        module_config_template()
    elif args.configinfo:
        module_config_info()
    else:
        config_manager = configmanager.ConfigManager()
        config_manager.load()
        global_config = config_manager.get_global_config()
        print "GLOBAL CONF", global_config
        ds = dataspace.DataSpace(global_config)

        #data_block = datablock.DataBlock(ds,
        #                                 '6D596F43-B4DB-4418-812A-79869001E72B',
        #                                 1)
        data_block = datablock.DataBlock(
            ds, "AWS_Calculations_with_source_proxy",
            "F70B4110-E66D-49CA-9333-4A983A679F37", 1, 109)

        fm_info = FigureOfMerit()
        rc = fm_info.transform(data_block)
        print "INFO"
        print rc
Пример #18
0
    my_logger = logging.getLogger('decision_engine')
    my_logger.info('Starting decision engine')

    if len(sys.argv) > 1:
        channel_name = sys.argv[1]
        channel_conf = os.path.join(config_manager.channel_config_dir,
                                    channel_name)
        with open(os.path.abspath(channel_conf), 'r') as f:
            channels = {}
            channel_name = channel_name.split('.')[0]
            code = 'channels[channel_name]=' + ''.join(f.readlines())
            exec(code)
    else:
        channels = config_manager.get_channels()

    ds = dataspace.DataSpace(global_config)
    taskmanager_id = str(uuid.uuid4()).upper()
    generation_id = 1

    task_managers = {}
    data_space = {}
    """
    create channels
    """
    for ch in channels:
        task_managers[ch] = TaskManager(ch, taskmanager_id, generation_id,
                                        channels[ch], global_config)

    for key, value in task_managers.items():
        p = multiprocessing.Process(target=value.run,
                                    args=(),
Пример #19
0
 def post_create(self, global_config):
     self.dataspace = dataspace.DataSpace(global_config)
Пример #20
0
    def __init__(self,
                 name,
                 channel_dict,
                 global_config,
                 de_source_workers=None):
        """
        :type name: :obj:`str`
        :arg name: Name of channel corresponding to this task manager
        :type generation_id: :obj:`int`
        :arg generation_id: Task manager generation id provided by caller
        :type channel_dict: :obj:`dict`
        :arg channel_dict: channel configuration
        :type global_config: :obj:`dict`
        :arg global_config: global configuration
        """
        super().__init__(channel_dict.get("channel_name", name))

        self.id = str(uuid.uuid4()).upper()
        self.dataspace = dataspace.DataSpace(global_config)
        self.data_block_t0 = datablock.DataBlock(self.dataspace, name, self.id,
                                                 1)  # my current data block
        self.logger = structlog.getLogger(CHANNELLOGGERNAME)
        self.logger = self.logger.bind(module=__name__.split(".")[-1],
                                       channel=self.name)

        self.broker_url = global_config.get("broker_url",
                                            "redis://localhost:6379/0")
        self.logger.debug(f"Using data-broker URL: {self.broker_url}")

        self.logger.debug("Creating channel sources")
        self.source_workers = _make_workers_for(channel_dict["sources"],
                                                Source, self.name)
        if de_source_workers is not None:
            # Decision engine owns the sources
            de_source_workers[self.name] = self.source_workers

        self.logger.debug("Creating channel publishers")
        self.publisher_workers = _make_workers_for(channel_dict["publishers"],
                                                   Publisher, self.name)

        self.logger.debug("Creating channel logic engines")
        configured_le_s = channel_dict.get("logicengines")
        if configured_le_s is None:
            self.logger.debug(
                "No 'logicengines' configuration detected; will use default configuration, which unconditionally executes all configured publishers."
            )
            configured_le_s = passthrough_configuration(
                channel_dict["publishers"].keys())
        if len(configured_le_s) > 1:
            raise RuntimeError(
                "Cannot support more than one logic engine per channel.")

        self.logic_engine = None
        if configured_le_s:
            key, config = configured_le_s.popitem()
            self.logic_engine = Worker(key, config, LogicEngine, self.name)

        self.logger.debug("Creating channel transforms")
        transform_workers = _make_workers_for(channel_dict["transforms"],
                                              Transform, self.name)
        self.transform_workers = ensure_no_circularities(
            self.source_workers, transform_workers, self.publisher_workers)

        exchange_name = global_config.get("exchange_name",
                                          "hepcloud_topic_exchange")
        self.logger.debug(
            f"Creating topic exchange {exchange_name} for channel {self.name}")
        self.exchange = Exchange(exchange_name, "topic")
        self.connection = Connection(self.broker_url)

        expected_source_products = set()
        queues = {}
        for worker in self.source_workers.values():
            # FIXME: Just keeping track of instance names will not
            #        work whenever we have multiple source instances
            #        of the same source type.
            expected_source_products.update(
                worker.module_instance._produces.keys())
            self.logger.debug(
                f"Creating queue {worker.full_key} with routing key {worker.full_key}"
            )
            queues[worker.full_key] = Queue(
                worker.full_key,
                exchange=self.exchange,
                routing_key=worker.full_key,
                auto_delete=True,
            )
        self.expected_source_products = expected_source_products
        self.queues = queues

        # Caching to determine if all sources have run at least once.
        self.sources_have_run_once = False
        self.source_product_cache = {}

        # The rest of this function will go away once the source-proxy
        # has been reimplemented.
        for src_worker in self.source_workers.values():
            src_worker.module_instance.post_create(global_config)