def test_DataBlock_constructor(self): dblock = datablock.DataBlock(self.dataspace, self.data["taskmanager"][0]["name"], self.data["taskmanager"][0]["taskmanager_id"]) self.assertEqual(str(dblock.generation_id), self.data["dataproduct"][0]["generation_id"]) dblock = datablock.DataBlock(self.dataspace, self.data["taskmanager"][0]["name"], generation_id=self.data["dataproduct"][0]["generation_id"]) self.assertEqual(str(dblock.generation_id), self.data["dataproduct"][0]["generation_id"]) dblock = datablock.DataBlock(self.dataspace, self.data["taskmanager"][0]["name"], taskmanager_id=self.data["taskmanager"][0]["taskmanager_id"], sequence_id=1) self.assertEqual(str(dblock.generation_id), self.data["dataproduct"][0]["generation_id"])
def test_DataBlock_constructor(dataspace): # noqa: F811 my_tm = dataspace.get_taskmanagers()[0] # fetch one of our loaded examples dblock = datablock.DataBlock(dataspace, my_tm["name"], my_tm["taskmanager_id"]) assert dblock.generation_id == 1 dblock = datablock.DataBlock(dataspace, my_tm["name"], generation_id=1) assert dblock.generation_id == 1 dblock = datablock.DataBlock(dataspace, my_tm["name"], my_tm["taskmanager_id"], sequence_id=1) assert dblock.generation_id == 1
def __init__(self, name, generation_id, channel_dict, global_config): """ :type name: :obj:`str` :arg name: Name of channel corresponding to this task manager :type generation_id: :obj:`int` :arg generation_id: Task Manager generation id provided by caller :type channel_dict: :obj:`dict` :arg channel_dict: channel configuration :type global_config: :obj:`dict` :arg global_config: global configuration """ self.id = str(uuid.uuid4()).upper() self.dataspace = dataspace.DataSpace(global_config) self.data_block_t0 = datablock.DataBlock( self.dataspace, name, self.id, generation_id) # my current data block self.name = name self.channel = Channel(channel_dict) self.state = ProcessingState() self.loglevel = multiprocessing.Value('i', logging.WARNING) self.lock = threading.Lock() # The rest of this function will go away once the source-proxy # has been reimplemented. for src_worker in self.channel.sources.values(): src_worker.worker.post_create(global_config)
def test_DataBlock_key_management(dataspace): # noqa: F811 my_tm = dataspace.get_taskmanagers()[0] # fetch one of our loaded examples header = datablock.Header(my_tm["taskmanager_id"]) metadata = datablock.Metadata( my_tm["taskmanager_id"], generation_id=dataspace.get_last_generation_id( my_tm["name"], my_tm["taskmanager_id"]), ) dblock = datablock.DataBlock(dataspace, my_tm["name"], my_tm["taskmanager_id"]) # test with automatic metadata and string value dblock.put("example_test_key", "example_test_value", header) assert "example_test_key" in dblock.keys() assert "example_test_key" in dblock assert dblock.get("example_test_key") == "example_test_value" # Test product-retriever interface retriever = datablock.ProductRetriever("example_test_key", None, None) assert retriever(dblock) == "example_test_value" assert ( str(retriever) == "Product retriever for {'name': 'example_test_key', 'type': None, 'creator': None}" ) # test new key with manual metadata and dict value newDict = {"subKey": "newValue"} dblock.put("new_example_test_key", newDict, header, metadata) assert dblock["new_example_test_key"] == newDict
def test_bad_datablock(global_config, dataspace, caplog): # noqa: F811 for channel in _TEST_CHANNEL_NAMES: with RunChannel(global_config, channel) as task_manager: dblock = datablock.DataBlock(dataspace, channel) task_manager.data_block_put("bad_string", "header", dblock) task_manager.take_offline() assert "data_block put expecting" in caplog.text
def rpc_print_products(self): with self.channel_workers.access() as workers: channel_keys = workers.keys() if not channel_keys: return "No channels are currently active.\n" width = max(len(x) for x in channel_keys) + 1 txt = "" for ch, worker in workers.items(): if not worker.is_alive(): txt += f"Channel {ch} is in ERROR state\n" continue txt += f"channel: {ch:<{width}}, id = {worker.task_manager.id:<{width}}, state = {worker.get_state_name():<10} \n" tm = self.dataspace.get_taskmanager(ch) data_block = datablock.DataBlock( self.dataspace, ch, taskmanager_id=tm["taskmanager_id"], sequence_id=tm["sequence_id"] ) data_block.generation_id -= 1 channel_config = self.channel_config_loader.get_channels()[ch] produces = worker.get_produces() for i in ("sources", "transforms", "logicengines", "publishers"): txt += f"\t{i}:\n" modules = channel_config.get(i, {}) for mod_name in modules.keys(): txt += f"\t\t{mod_name}\n" products = produces.get(mod_name, []) for product in products: try: df = data_block[product] df = pd.read_json(df.to_json()) txt += f"{tabulate.tabulate(df, headers='keys', tablefmt='psql')}\n" except Exception as e: # pragma: no cover txt += f"\t\t\t{e}\n" return txt[:-1]
def test_DataBlock_key_management(dataspace): # noqa: F811 my_tm = dataspace.get_taskmanagers()[0] # fetch one of our loaded examples header = datablock.Header(my_tm["taskmanager_id"]) dblock = datablock.DataBlock(dataspace, my_tm["name"], my_tm["taskmanager_id"]) dblock.put("example_test_key", "example_test_value", header) assert "example_test_key" in dblock.keys() assert "example_test_key" in dblock assert dblock.get("example_test_key") == "example_test_value" # Test product-retriever interface retriever = datablock.ProductRetriever("example_test_key", None, None) assert retriever(dblock) == "example_test_value" assert ( str(retriever) == "Product retriever for {'name': 'example_test_key', 'type': None, 'creator': None}" ) # FIXME: The following behavior should be disallowed for data-integrity reasons! # i.e. replacing a product name with a different value. newDict = {"subKey": "newValue"} dblock.put("example_test_key", newDict, header) assert dblock["example_test_key"] == newDict
def rpc_print_products(self): width = max(map(lambda x: len(x), self.task_managers.keys())) + 1 txt = "" for ch, worker in self.task_managers.items(): sname = TaskManager.STATE_NAMES[worker.task_manager.get_state()] txt += "channel: {:<{width}}, id = {:<{width}}, state = {:<10} \n".format( ch, worker.task_manager.id, sname, width=width) tm = self.dataspace.get_taskmanager(ch) data_block = datablock.DataBlock( self.dataspace, ch, taskmanager_id=tm['taskmanager_id'], sequence_id=tm['sequence_id']) data_block.generation_id -= 1 channel_config = self.config_manager.get_channels()[ch] produces = self.config_manager.get_produces(channel_config) for i in ("sources", "transforms", "logicengines", "publishers"): txt += "\t{}:\n".format(i) modules = channel_config.get(i, {}) for mod_name, mod_config in modules.iteritems(): txt += "\t\t{}\n".format(mod_name) products = produces.get(mod_name, []) for product in products: try: df = data_block[product] df = pd.read_json(df.to_json()) txt += "{}\n".format( tabulate.tabulate(df, headers='keys', tablefmt='psql')) except Exception as e: txt += "\t\t\t{}\n".format(str(e)) pass return txt[:-1]
def test_bad_datablock(global_config, dataspace, caplog): # noqa: F811 with RunChannel(global_config, "test_channel") as task_manager: task_manager.state.wait_while(State.ACTIVE) dblock = datablock.DataBlock(dataspace, task_manager.name) task_manager.data_block_put("bad_string", "header", dblock) task_manager.take_offline() assert "data_block put expecting" in caplog.text
def test_DataBlock_to_str(dataspace): # noqa: F811 my_tm = dataspace.get_taskmanagers()[0] # fetch one of our loaded examples expected = { "taskmanager_id": my_tm["taskmanager_id"], "generation_id": dataspace.get_last_generation_id(my_tm["name"], my_tm["taskmanager_id"]), "sequence_id": len(dataspace.get_dataproducts(my_tm["sequence_id"])) + 1, "keys": [ "example_test_key", ], "dataproducts": { "example_test_key": "example_test_value" }, } header = datablock.Header(my_tm["taskmanager_id"]) dblock = datablock.DataBlock(dataspace, my_tm["name"], my_tm["taskmanager_id"]) dblock.put("example_test_key", "example_test_value", header) result = ast.literal_eval(str(dblock)) assert result == expected
def acquire(self): """ Overrides Source class method """ data_block = None for _ in range(self.retries): try: tm = self.dataspace.get_taskmanager(self.source_channel) self.logger.debug('task manager %s', tm) if tm['taskmanager_id']: # get last datablock data_block = datablock.DataBlock( self.dataspace, self.source_channel, taskmanager_id=tm['taskmanager_id'], sequence_id=tm['sequence_id']) self.logger.debug('data block %s', data_block) if data_block and data_block.generation_id: self.logger.debug("DATABLOCK %s", data_block) # This is a valid datablock break except Exception as detail: self.logger.error('Error getting datablock for %s %s', self.source_channel, detail) time.sleep(self.retry_to) if not data_block: raise RuntimeError('Could not get data.') rc = {} filled_keys = [] for _ in range(self.retries): if len(filled_keys) != len(self.data_keys): for k in self.data_keys: if isinstance(k, tuple) or isinstance(k, list): k_in = k[0] k_out = k[1] else: k_in = k k_out = k if k_in not in filled_keys: try: rc[k_out] = pd.DataFrame( self._get_data(data_block, k_in)) filled_keys.append(k) except KeyError as ke: self.logger.debug("KEYERROR %s", ke) if len(filled_keys) == len(self.data_keys): break # expected data is not ready yet time.sleep(self.retry_to) if len(filled_keys) != len(self.data_keys): raise RuntimeError( 'Could not get all data. Expected {} Filled {}'.format( self.data_keys, filled_keys)) return rc
def test_DataBlock_get_header(dataspace): # noqa: F811 my_tm = dataspace.get_taskmanagers()[0] # fetch one of our loaded examples header = datablock.Header(my_tm["taskmanager_id"]) dblock = datablock.DataBlock(dataspace, my_tm["name"], my_tm["taskmanager_id"]) dblock.put("example_test_key", "example_test_value", header) assert header == dblock.get_header("example_test_key")
def rpc_query_tool(self, product, format=None, start_time=None): with QUERY_TOOL_HISTOGRAM.labels(product).time(): found = False result = pd.DataFrame() txt = f"Product {product}: " with self.channel_workers.access() as workers: for ch, worker in workers.items(): if not worker.is_alive(): txt += f"Channel {ch} is in not active\n" continue produces = worker.get_produces() r = [x for x in list(produces.items()) if product in x[1]] if not r: continue found = True txt += f" Found in channel {ch}\n" if start_time: tms = self.dataspace.get_taskmanagers( ch, start_time=start_time) else: tms = [self.dataspace.get_taskmanager(ch)] for tm in tms: try: data_block = datablock.DataBlock( self.dataspace, ch, taskmanager_id=tm["taskmanager_id"], sequence_id=tm["sequence_id"]) products = data_block.get_dataproducts(product) for p in products: df = p["value"] if df.shape[0] > 0: df["channel"] = [tm["name"]] * df.shape[0] df["taskmanager_id"] = [ p["taskmanager_id"] ] * df.shape[0] df["generation_id"] = [p["generation_id"] ] * df.shape[0] result = result.append(df) except Exception as e: # pragma: no cover txt += f"\t\t{e}\n" if found: dataframe_formatter = self._dataframe_to_table if format == "csv": dataframe_formatter = self._dataframe_to_csv if format == "json": dataframe_formatter = self._dataframe_to_json result = result.reset_index(drop=True) txt += dataframe_formatter(result) else: txt += "Not produced by any module\n" return txt
def test_DataBlock_no_key_by_name(dataspace): # noqa: F811 my_tm = dataspace.get_taskmanagers()[0] # fetch one of our loaded examples header = datablock.Header(my_tm["taskmanager_id"]) dblock = datablock.DataBlock(dataspace, my_tm["name"], my_tm["taskmanager_id"]) dblock.put("example_test_key", "example_test_value", header) with pytest.raises(KeyError): dblock["no_such_key_exists"]
def test_DataBlock_mark_expired(dataspace): # noqa: F811 # mark_expired is just a stub in this case # failure in a real implementation should raise an exception my_tm = dataspace.get_taskmanagers()[0] # fetch one of our loaded examples header = datablock.Header(my_tm["taskmanager_id"]) dblock = datablock.DataBlock(dataspace, my_tm["name"], my_tm["taskmanager_id"]) dblock.put("example_test_key", "example_test_value", header) assert dblock.mark_expired(1) is None
def rpc_print_product(self, product, columns=None, query=None): found = False txt = "Product {}: ".format(product) for ch, worker in self.task_managers.items(): channel_config = self.config_manager.get_channels()[ch] produces = self.config_manager.get_produces(channel_config) r = filter(lambda x: product in x[1], produces.items()) if not r: continue found = True txt += " Found in channel {}\n".format(ch) tm = self.dataspace.get_taskmanager(ch) try: data_block = datablock.DataBlock( self.dataspace, ch, taskmanager_id=tm['taskmanager_id'], sequence_id=tm['sequence_id']) data_block.generation_id -= 1 df = data_block[product] df = pd.read_json(df.to_json()) column_names = [] if columns: column_names = columns.split(",") if query: if column_names: txt += "{}\n".format( tabulate.tabulate( df.loc[:, column_names].query(query), headers='keys', tablefmt='psql')) else: txt += "{}\n".format( tabulate.tabulate(df.query(query), headers='keys', tablefmt='psql')) else: if column_names: txt += "{}\n".format( tabulate.tabulate(df.loc[:, column_names], headers='keys', tablefmt='psql')) else: txt += "{}\n".format( tabulate.tabulate(df, headers='keys', tablefmt='psql')) except Exception as e: txt += "\t\t{}\n".format(str(e)) pass if not found: txt += "Not Found\n" return txt[:-1]
def test_DataBlock_get_dataproducts(dataspace): # noqa: F811 my_tm = dataspace.get_taskmanagers()[0] # fetch one of our loaded examples header = datablock.Header(my_tm["taskmanager_id"]) dblock = datablock.DataBlock(dataspace, my_tm["name"], my_tm["taskmanager_id"]) dblock.put("example_test_key", "example_test_value", header) products = dblock.get_dataproducts() assert len(products) == 1 assert products[0]["key"] == "example_test_key" assert products[0]["value"] == "example_test_value"
def acquire(self): """ Overrides Source class method """ data_block = None for _ in range(self.max_attempts): try: tm = self.dataspace.get_taskmanager(self.source_channel) self.logger.debug("task manager %s", tm) if tm["taskmanager_id"]: # get last datablock data_block = datablock.DataBlock( self.dataspace, self.source_channel, taskmanager_id=tm["taskmanager_id"], sequence_id=tm["sequence_id"], ) self.logger.debug("data block %s", data_block) if data_block and data_block.generation_id: self.logger.debug("DATABLOCK %s", data_block) # This is a valid datablock break except Exception as detail: self.logger.error("Error getting datablock for %s %s", self.source_channel, detail) time.sleep(self.retry_interval) if not data_block: raise RuntimeError("Could not get data.") rc = {} filled_keys = [] for _ in range(self.max_attempts): if len(filled_keys) != len(self.data_keys): for k_in, k_out in self.data_keys.items(): if k_in not in filled_keys: try: rc[k_out] = self._get_data(data_block, k_in) filled_keys.append(k_in) except KeyError as ke: self.logger.debug("KEYERROR %s", ke) if len(filled_keys) == len(self.data_keys): break # expected data is not ready yet time.sleep(self.retry_interval) if len(filled_keys) != len(self.data_keys): raise RuntimeError( f"Could not get all data. Expected {self.data_keys} Filled {filled_keys}" ) return rc
def test_DataBlock_key_management_change_name(dataspace): # noqa: F811 my_tm = dataspace.get_taskmanagers()[0] # fetch one of our loaded examples header = datablock.Header(my_tm["taskmanager_id"]) dblock = datablock.DataBlock(dataspace, my_tm["name"], my_tm["taskmanager_id"]) dblock.put("example_test_key", "example_test_value", header) # FIXME: The following behavior should be disallowed for data-integrity reasons! # i.e. replacing a product name from datablock.ProductRetriever with a # different value. newDict = {"subKey": "newValue"} dblock.put("example_test_key", newDict, header) assert dblock["example_test_key"] == newDict
def test_DataBlock_get_metadata(dataspace): # noqa: F811 my_tm = dataspace.get_taskmanagers()[0] # fetch one of our loaded examples header = datablock.Header(my_tm["taskmanager_id"]) metadata = datablock.Metadata( my_tm["taskmanager_id"], generation_id=dataspace.get_last_generation_id( my_tm["name"], my_tm["taskmanager_id"]), ) dblock = datablock.DataBlock(dataspace, my_tm["name"], my_tm["taskmanager_id"]) dblock.put("example_test_key", "example_test_value", header, metadata) assert metadata == dblock.get_metadata("example_test_key")
def test_DataBlock_is_expired_with_key(dataspace): # noqa: F811 """This test just validates the method/function exists. The stub within our default code should be replaced by a class inheriting from it. That class should have more rational return types. """ my_tm = dataspace.get_taskmanagers()[0] # fetch one of our loaded examples header = datablock.Header(my_tm["taskmanager_id"]) dblock = datablock.DataBlock(dataspace, my_tm["name"], my_tm["taskmanager_id"]) dblock.put("example_test_key", "example_test_value", header) assert dblock.is_expired(key="example_test_key") is None
def main(): """ Call this a a test unit or use as CLI of this module """ import argparse parser = argparse.ArgumentParser() parser.add_argument('--configtemplate', action='store_true', help='prints the expected module configuration') parser.add_argument( '--configinfo', action='store_true', help='prints config template along with produces and consumes info') args = parser.parse_args() if args.configtemplate: module_config_template() elif args.configinfo: module_config_info() else: config_manager = configmanager.ConfigManager() config_manager.load() global_config = config_manager.get_global_config() print("GLOBAL CONF", global_config) ds = dataspace.DataSpace(global_config) data_block = datablock.DataBlock( ds, # '5CC840DD-88B9-45CE-9DA2-FF531289AC66', 'C56E0AAF-99D3-42A8-88A3-921E30C1879C', 1) fm_info = AWSFOMPublisher({ "publish_to_graphite": True, "graphite_host": "fifemondata.fnal.gov", "graphite_port": 2104, "graphite_context": "hepcloud.aws", "output_file": "%s/de_data/AWS_figure_of_merit.csv" % (os.environ.get('HOME'), ) }) rc = fm_info.publish(data_block)
def rpc_print_products(self): with self.workers.access() as workers: channel_keys = workers.keys() if not channel_keys: return "No channels are currently active.\n" width = max([len(x) for x in channel_keys]) + 1 txt = "" for ch, worker in workers.items(): if not worker.is_alive(): txt += f"Channel {ch} is in ERROR state\n" continue txt += "channel: {:<{width}}, id = {:<{width}}, state = {:<10} \n".format( ch, worker.task_manager_id, worker.get_state_name(), width=width) tm = self.dataspace.get_taskmanager(ch) data_block = datablock.DataBlock( self.dataspace, ch, taskmanager_id=tm['taskmanager_id'], sequence_id=tm['sequence_id']) data_block.generation_id -= 1 channel_config = self.channel_config_loader.get_channels()[ch] produces = self.channel_config_loader.get_produces( channel_config) for i in ("sources", "transforms", "logicengines", "publishers"): txt += "\t{}:\n".format(i) modules = channel_config.get(i, {}) for mod_name, mod_config in modules.items(): txt += "\t\t{}\n".format(mod_name) products = produces.get(mod_name, []) for product in products: try: df = data_block[product] df = pd.read_json(df.to_json()) txt += "{}\n".format( tabulate.tabulate(df, headers='keys', tablefmt='psql')) except Exception as e: txt += "\t\t\t{}\n".format(e) return txt[:-1]
def test_DataBlock_duplicate(dataspace): # noqa: F811 my_tm = dataspace.get_taskmanagers()[0] # fetch one of our loaded examples header = datablock.Header(my_tm["taskmanager_id"]) dblock = datablock.DataBlock(dataspace, my_tm["name"], my_tm["taskmanager_id"]) dblock.put("example_test_key", "example_test_value", header) dblock_2 = dblock.duplicate() assert dblock.taskmanager_id == dblock_2.taskmanager_id assert dblock.generation_id == dblock_2.generation_id + 1 assert dblock.sequence_id == dblock_2.sequence_id assert dblock._keys == dblock_2._keys for key in dblock._keys: assert dblock[key] == dblock_2[key]
def __init__(self, name, task_manager_id, generation_id, channel_dict, global_config): """ :type task_manager_id: :obj:`int` :arg task_manager_id: Task Manager id provided by caller :type channel_dict: :obj:`dict` :arg channel_dict: channel configuration :type data_block: :obj:`~datablock.DataBlock` :arg data_block: data block """ self.dataspace = dataspace.DataSpace(global_config) self.data_block_t0 = datablock.DataBlock( self.dataspace, name, task_manager_id, generation_id) # my current data block self.name = name self.id = task_manager_id self.channel = Channel(channel_dict) self.state = multiprocessing.Value('i', BOOT) self.decision_cycle_active = False self.lock = threading.Lock() self.logger = de_logger.get_logger() self.stop = False # stop running all loops when this is True
def main(): """ Call this a a test unit or use as CLI of this module """ import argparse parser = argparse.ArgumentParser() parser.add_argument('--configtemplate', action='store_true', help='prints the expected module configuration') parser.add_argument( '--configinfo', action='store_true', help='prints config template along with produces and consumes info') args = parser.parse_args() if args.configtemplate: module_config_template() elif args.configinfo: module_config_info() else: config_manager = configmanager.ConfigManager() config_manager.load() global_config = config_manager.get_global_config() print "GLOBAL CONF", global_config ds = dataspace.DataSpace(global_config) #data_block = datablock.DataBlock(ds, # '6D596F43-B4DB-4418-812A-79869001E72B', # 1) data_block = datablock.DataBlock( ds, "AWS_Calculations_with_source_proxy", "F70B4110-E66D-49CA-9333-4A983A679F37", 1, 109) fm_info = FigureOfMerit() rc = fm_info.transform(data_block) print "INFO" print rc
def __init__(self, name, workers, dataspace, expected_products, exchange, broker_url, queue_info): """ :type name: :obj:`str` :arg name: Name of channel corresponding to this task manager :type generation_id: :obj:`int` :arg generation_id: Task manager generation id provided by caller :type channel_dict: :obj:`dict` :arg channel_dict: channel configuration :type global_config: :obj:`dict` :arg global_config: global configuration """ self.name = name self.state = ProcessingState() self.loglevel = multiprocessing.Value("i", logging.WARNING) self.id = str(uuid.uuid4()).upper() self.data_block_t0 = datablock.DataBlock(dataspace, name, self.id, 1) # my current data block self.logger = structlog.getLogger(CHANNELLOGGERNAME) self.logger = self.logger.bind(module=__name__.split(".")[-1], channel=self.name) # The DE owns the sources self.source_workers = workers["sources"] self.transform_workers = workers["transforms"] self.logic_engine = workers["logic_engine"] self.publisher_workers = workers["publishers"] self.exchange = exchange self.broker_url = broker_url self.connection = Connection(self.broker_url) self.source_product_cache = SourceProductCache(expected_products, self.logger) self.queue_info = queue_info self.routing_keys = [info[1] for info in self.queue_info]
def acquire(self): """ Overrides Source class method """ retry_cnt = 0 data_block = None while retry_cnt < self.retries: try: tm = self.dataspace.get_taskmanager(self.source_channel) self.logger.debug('task manager %s' % (tm, )) if tm['taskmanager_id']: # get last datablock data_block = datablock.DataBlock( self.dataspace, self.source_channel, taskmanager_id=tm['taskmanager_id'], sequence_id=tm['sequence_id']) break else: retry_cnt += 1 time.sleep(self.retry_to) except Exception, detail: self.logger.error('Error getting datablock for %s %s' % (self.source_channel, detail))
def rpc_print_product(self, product, columns=None, query=None, types=False, format=None): found = False txt = "Product {}: ".format(product) with self.workers.access() as workers: for ch, worker in workers.items(): if not worker.is_alive(): txt += f"Channel {ch} is in not active\n" continue produces = worker.get_produces() r = [x for x in list(produces.items()) if product in x[1]] if not r: continue found = True txt += " Found in channel {}\n".format(ch) tm = self.dataspace.get_taskmanager(ch) try: data_block = datablock.DataBlock( self.dataspace, ch, taskmanager_id=tm['taskmanager_id'], sequence_id=tm['sequence_id']) data_block.generation_id -= 1 df = data_block[product] df = pd.read_json(df.to_json()) dataframe_formatter = self._dataframe_to_table if format == 'vertical': dataframe_formatter = self._dataframe_to_vertical_tables if format == 'column-names': dataframe_formatter = self._dataframe_to_column_names if format == 'json': dataframe_formatter = self._dataframe_to_json if types: for column in df.columns: df.insert( df.columns.get_loc(column) + 1, f"{column}.type", df[column].transform( lambda x: type(x).__name__)) column_names = [] if columns: column_names = columns.split(",") if query: if column_names: txt += dataframe_formatter( df.loc[:, column_names].query(query)) else: txt += dataframe_formatter(df.query(query)) else: if column_names: txt += dataframe_formatter(df.loc[:, column_names]) else: txt += dataframe_formatter(df) except Exception as e: # pragma: no cover txt += "\t\t{}\n".format(e) if not found: txt += "Not produced by any module\n" return txt[:-1]
def rpc_print_product(self, product, columns=None, query=None, types=False, format=None): if not isinstance(product, str): raise ValueError( f"Requested product should be a string not {type(product)}") found = False txt = f"Product {product}: " with self.channel_workers.access() as workers: for ch, worker in workers.items(): if not worker.is_alive(): txt += f"Channel {ch} is in not active\n" self.logger.debug( f"Channel:{ch} is in not active when running rpc_print_product" ) continue produces = worker.get_produces() r = [x for x in list(produces.items()) if product in x[1]] if not r: continue found = True txt += f" Found in channel {ch}\n" self.logger.debug( f"Found channel:{ch} active when running rpc_print_product" ) tm = self.dataspace.get_taskmanager(ch) self.logger.debug( f"rpc_print_product - channel:{ch} taskmanager:{tm}") try: data_block = datablock.DataBlock( self.dataspace, ch, taskmanager_id=tm["taskmanager_id"], sequence_id=tm["sequence_id"]) data_block.generation_id -= 1 df = data_block[product] dfj = df.to_json() self.logger.debug( f"rpc_print_product - channel:{ch} task manager:{tm} datablock:{dfj}" ) df = pd.read_json(dfj) dataframe_formatter = self._dataframe_to_table if format == "vertical": dataframe_formatter = self._dataframe_to_vertical_tables if format == "column-names": dataframe_formatter = self._dataframe_to_column_names if format == "json": dataframe_formatter = self._dataframe_to_json if types: for column in df.columns: df.insert( df.columns.get_loc(column) + 1, f"{column}.type", df[column].transform( lambda x: type(x).__name__), ) column_names = [] if columns: column_names = columns.split(",") if query: if column_names: txt += dataframe_formatter( df.loc[:, column_names].query(query)) else: txt += dataframe_formatter(df.query(query)) else: if column_names: txt += dataframe_formatter(df.loc[:, column_names]) else: txt += dataframe_formatter(df) except Exception as e: # pragma: no cover txt += f"\t\t{e}\n" if not found: txt += "Not produced by any module\n" return txt[:-1]