def start_all_cores(self, executable_targets, app_id, txrx, sync_state_changes): """ :param executable_targets: the mapping between cores and binaries :param app_id: the app id that being used by the simulation :param sync_state_changes: the number of runs been done between setup\ and end :param txrx: the python interface to the spinnaker machine :return: None """ total_processors = executable_targets.total_processors all_core_subsets = executable_targets.all_core_subsets # check that the right number of processors are in correct sync if sync_state_changes % 2 == 0: sync_state = SCPSignal.SYNC0 else: sync_state = SCPSignal.SYNC1 # if correct, start applications logger.info("Starting application") txrx.send_signal(app_id, sync_state) sync_state_changes += 1 # check all apps have gone into run state logger.info("Checking that the application has started") processors_running = txrx.get_core_state_count( app_id, CPUState.RUNNING) if processors_running < total_processors: # deduce the correct state value if sync_state_changes % 2 == 0: sync_state = CPUState.SYNC0 else: sync_state = CPUState.SYNC1 processors_finished = txrx.get_core_state_count( app_id, sync_state) if processors_running + processors_finished >= total_processors: logger.warn("some processors finished between signal " "transmissions. Could be a sign of an error") else: unsuccessful_cores = helpful_functions.get_cores_not_in_state( all_core_subsets, CPUState.RUNNING, txrx) break_down = helpful_functions.get_core_status_string( unsuccessful_cores) raise exceptions.ExecutableFailedToStartException( "Only {} of {} processors started:{}" .format(processors_running, total_processors, break_down))
def wait_for_cores_to_be_ready( self, executable_targets, app_id, txrx, no_sync_state_changes): """ :param executable_targets: the mapping between cores and binaries :param app_id: the app id that being used by the simulation :param no_sync_state_changes: the number of runs been done between\ setup and end :param txrx: the python interface to the spinnaker machine :return: """ total_processors = executable_targets.total_processors all_core_subsets = executable_targets.all_core_subsets processor_c_main = txrx.get_core_state_count( app_id, CPUState.C_MAIN) # check that everything has gone though c main to reach sync0 or # failing for some unknown reason while processor_c_main != 0: time.sleep(0.1) processor_c_main = txrx.get_core_state_count( app_id, CPUState.C_MAIN) # check that the right number of processors are in correct sync if no_sync_state_changes % 2 == 0: sync_state = CPUState.SYNC0 else: sync_state = CPUState.SYNC1 # check that the right number of processors are in sync0 processors_ready = txrx.get_core_state_count( app_id, sync_state) if processors_ready != total_processors: unsuccessful_cores = helpful_functions.get_cores_not_in_state( all_core_subsets, sync_state, txrx) # last chance to slip out of error check if len(unsuccessful_cores) != 0: break_down = helpful_functions.get_core_status_string( unsuccessful_cores) raise exceptions.ExecutableFailedToStartException( "Only {} processors out of {} have successfully reached " "{}:{}".format( processors_ready, total_processors, sync_state.name, break_down))
def start_all_cores(executable_targets, app_id, txrx, sync_state_changes): """ :param executable_targets: the mapping between cores and binaries :param app_id: the app id that being used by the simulation :param sync_state_changes: the number of runs been done between setup\ and end :param txrx: the python interface to the spinnaker machine :return: None """ total_processors = executable_targets.total_processors all_core_subsets = executable_targets.all_core_subsets # check that the right number of processors are in correct sync if sync_state_changes % 2 == 0: sync_state = SCPSignal.SYNC0 else: sync_state = SCPSignal.SYNC1 # if correct, start applications logger.info("Starting application ({})".format(sync_state)) txrx.send_signal(app_id, sync_state) sync_state_changes += 1 # check all apps have gone into run state logger.info("Checking that the application has started") processors_running = txrx.get_core_state_count(app_id, CPUState.RUNNING) if processors_running < total_processors: processors_finished = txrx.get_core_state_count( app_id, CPUState.PAUSED) if processors_running + processors_finished >= total_processors: logger.warn("some processors finished between signal " "transmissions. Could be a sign of an error") else: unsuccessful_cores = helpful_functions.get_cores_not_in_state( all_core_subsets, {CPUState.RUNNING, CPUState.PAUSED}, txrx) # Last chance to get out of error state if len(unsuccessful_cores) > 0: break_down = helpful_functions.get_core_status_string( unsuccessful_cores) raise exceptions.ExecutableFailedToStartException( "Only {} of {} processors started:{}".format( processors_running, total_processors, break_down), helpful_functions.get_core_subsets(unsuccessful_cores))
def wait_for_cores_to_be_ready(executable_targets, app_id, txrx, no_sync_state_changes): """ :param executable_targets: the mapping between cores and binaries :param app_id: the app id that being used by the simulation :param no_sync_state_changes: the number of runs been done between\ setup and end :param txrx: the python interface to the spinnaker machine :return: """ total_processors = executable_targets.total_processors all_core_subsets = executable_targets.all_core_subsets # check that everything has gone though c main to reach sync0 or # failing for some unknown reason processor_c_main = txrx.get_core_state_count(app_id, CPUState.C_MAIN) while processor_c_main != 0: time.sleep(0.1) processor_c_main = txrx.get_core_state_count( app_id, CPUState.C_MAIN) # check that the right number of processors are in correct sync if no_sync_state_changes % 2 == 0: sync_state = CPUState.SYNC0 else: sync_state = CPUState.SYNC1 # check that the right number of processors are in sync0 processors_ready = txrx.get_core_state_count(app_id, sync_state) if processors_ready != total_processors: unsuccessful_cores = helpful_functions.get_cores_not_in_state( all_core_subsets, sync_state, txrx) # last chance to slip out of error check if len(unsuccessful_cores) != 0: break_down = helpful_functions.get_core_status_string( unsuccessful_cores) raise exceptions.ExecutableFailedToStartException( "Only {} processors out of {} have successfully reached " "{}:{}".format(processors_ready, total_processors, sync_state.name, break_down), helpful_functions.get_core_subsets(unsuccessful_cores))
def spinnaker_based_data_specification_execution(self, write_memory_map_report, dsg_targets, transceiver, dse_app_id, app_id): """ :param write_memory_map_report: :param dsg_targets: :param transceiver: :param dse_app_id: :param app_id: :return: """ # create a progress bar for end users progress_bar = ProgressBar(len(dsg_targets), "Loading data specifications") number_of_cores_used = 0 core_subset = CoreSubsets() for (x, y, p, label) in dsg_targets: core_subset.add_processor(x, y, p) dse_data_struct_address = transceiver.malloc_sdram( x, y, constants.DSE_DATA_STRUCT_SIZE, dse_app_id) data_spec_file_path = dsg_targets[x, y, p, label] data_spec_file_size = os.path.getsize(data_spec_file_path) application_data_file_reader = FileDataReader(data_spec_file_path) base_address = transceiver.malloc_sdram(x, y, data_spec_file_size, dse_app_id) dse_data_struct_data = struct.pack("<4I", base_address, data_spec_file_size, app_id, write_memory_map_report) transceiver.write_memory(x, y, dse_data_struct_address, dse_data_struct_data, len(dse_data_struct_data)) transceiver.write_memory(x, y, base_address, application_data_file_reader, data_spec_file_size) # data spec file is written at specific address (base_address) # this is encapsulated in a structure with four fields: # 1 - data specification base address # 2 - data specification file size # 3 - future application ID # 4 - store data for memory map report (True / False) # If the memory map report is going to be produced, the # address of the structure is returned in user1 user_0_address = transceiver.\ get_user_0_register_address_from_core(x, y, p) transceiver.write_memory(x, y, user_0_address, dse_data_struct_address, 4) progress_bar.update() progress_bar.end() # Execute the DSE on all the cores logger.info("Loading the Data Specification Executor") dse_exec = os.path.join(os.path.dirname(spec_sender.__file__), 'data_specification_executor.aplx') file_reader = FileDataReader(dse_exec) size = os.stat(dse_exec).st_size transceiver.execute_flood(core_subset, file_reader, app_id, size) logger.info( "Waiting for On-chip Data Specification Executor to complete") processors_exited = transceiver.get_core_state_count( dse_app_id, CPUState.FINISHED) while processors_exited < number_of_cores_used: processors_errored = transceiver.get_core_state_count( dse_app_id, CPUState.RUN_TIME_EXCEPTION) if processors_errored > 0: error_cores = helpful_functions.get_cores_in_state( core_subset, CPUState, transceiver) if len(error_cores) > 0: error = helpful_functions.get_core_status_string( error_cores) raise Exception( "Data Specification Execution has failed: {}".format( error)) time.sleep(1) processors_exited = transceiver.get_core_state_count( dse_app_id, CPUState.FINISHED) transceiver.stop_application(dse_app_id) logger.info("On-chip Data Specification Executor completed") return {"LoadedApplicationDataToken": True}
def wait_for_execution_to_complete( self, executable_targets, app_id, runtime, time_scaling, txrx, buffer_manager, no_sync_state_changes): """ :param executable_targets: :param app_id: :param runtime: :param time_scaling: :param buffer_manager: :param no_sync_state_changes: the number of runs been done between\ setup and end :return: """ total_processors = executable_targets.total_processors all_core_subsets = executable_targets.all_core_subsets time_to_wait = ((runtime * time_scaling) / 1000.0) + 1.0 logger.info("Application started - waiting {} seconds for it to" " stop".format(time_to_wait)) time.sleep(time_to_wait) processors_not_finished = total_processors while processors_not_finished != 0: processors_rte = txrx.get_core_state_count( app_id, CPUState.RUN_TIME_EXCEPTION) if processors_rte > 0: rte_cores = helpful_functions.get_cores_in_state( all_core_subsets, CPUState.RUN_TIME_EXCEPTION, txrx) break_down = \ helpful_functions.get_core_status_string(rte_cores) raise exceptions.ExecutableFailedToStopException( "{} cores have gone into a run time error state:" "{}".format(processors_rte, break_down)) processors_not_finished = txrx.get_core_state_count( app_id, CPUState.RUNNING) if processors_not_finished > 0: logger.info("Simulation still not finished or failed - " "waiting a bit longer...") time.sleep(0.5) if no_sync_state_changes % 2 == 1: sync_state = CPUState.SYNC0 else: sync_state = CPUState.SYNC1 processors_exited = txrx.get_core_state_count( app_id, sync_state) if processors_exited < total_processors: unsuccessful_cores = helpful_functions.get_cores_not_in_state( all_core_subsets, sync_state, txrx) break_down = helpful_functions.get_core_status_string( unsuccessful_cores) raise exceptions.ExecutableFailedToStopException( "{} of {} processors failed to exit successfully:" "{}".format( total_processors - processors_exited, total_processors, break_down)) logger.info("Application has run to completion")
def __call__(self, app_id, txrx, executable_targets, has_ran): if not has_ran: raise exceptions.ConfigurationException( "The ran token is not set correctly, please fix and try again") total_processors = executable_targets.total_processors all_core_subsets = executable_targets.all_core_subsets progress_bar = ProgressBar( total_processors, "Turning off all the cores within the simulation") # check that the right number of processors are in sync0 processors_finished = txrx.get_core_state_count( app_id, CPUState.FINISHED) finished_cores = processors_finished while processors_finished != total_processors: if processors_finished > finished_cores: progress_bar.update(finished_cores - processors_finished) finished_cores = processors_finished processors_rte = txrx.get_core_state_count( app_id, CPUState.RUN_TIME_EXCEPTION) processors_watchdogged = txrx.get_core_state_count( app_id, CPUState.WATCHDOG) if processors_rte > 0 or processors_watchdogged > 0: error_cores = helpful_functions.get_cores_in_state( all_core_subsets, {CPUState.RUN_TIME_EXCEPTION, CPUState.WATCHDOG}, txrx) fail_message = helpful_functions.get_core_status_string( error_cores) raise exceptions.ExecutableFailedToStopException( "{} of {} processors went into an error state when" " shutting down: {}".format( processors_rte + processors_watchdogged, total_processors, fail_message), helpful_functions.get_core_subsets(error_cores), True) successful_cores_finished = set( helpful_functions.get_cores_in_state(all_core_subsets, CPUState.FINISHED, txrx)) all_cores = set(all_core_subsets) unsuccessful_cores = all_cores - successful_cores_finished for core_subset in unsuccessful_cores: for processor in core_subset.processor_ids: byte_data = struct.pack( "<I", constants.SDP_RUNNING_MESSAGE_CODES. SDP_STOP_ID_CODE.value) txrx.send_sdp_message( SDPMessage(sdp_header=SDPHeader( flags=SDPFlag.REPLY_NOT_EXPECTED, destination_port=(constants.SDP_PORTS. RUNNING_COMMAND_SDP_PORT.value), destination_cpu=processor, destination_chip_x=core_subset.x, destination_chip_y=core_subset.y), data=byte_data)) processors_finished = txrx.get_core_state_count( app_id, CPUState.FINISHED) progress_bar.end()
def wait_for_execution_to_complete(self, executable_targets, app_id, runtime, time_scaling, txrx, time_threshold): """ :param executable_targets: :param app_id: :param runtime: :param time_scaling: :param time_threshold: :param txrx: :param no_sync_state_changes: the number of runs been done between\ setup and end :return: """ total_processors = executable_targets.total_processors all_core_subsets = executable_targets.all_core_subsets time_to_wait = ((runtime * time_scaling) / 1000.0) + 0.1 logger.info( "Application started - waiting {} seconds for it to stop".format( time_to_wait)) time.sleep(time_to_wait) processors_not_finished = total_processors start_time = time.time() retries = 0 while (processors_not_finished != 0 and not self._has_overrun(start_time, time_threshold)): try: processors_rte = txrx.get_core_state_count( app_id, CPUState.RUN_TIME_EXCEPTION) processors_wdog = txrx.get_core_state_count( app_id, CPUState.WATCHDOG) if processors_rte > 0 or processors_wdog > 0: error_cores = helpful_functions.get_cores_in_state( all_core_subsets, {CPUState.RUN_TIME_EXCEPTION, CPUState.WATCHDOG}, txrx) break_down = helpful_functions.get_core_status_string( error_cores) raise exceptions.ExecutableFailedToStopException( "{} cores have gone into an error state:" "{}".format(processors_rte, break_down), helpful_functions.get_core_subsets(error_cores), True) processors_not_finished = txrx.get_core_state_count( app_id, CPUState.RUNNING) if processors_not_finished > 0: logger.info("Simulation still not finished or failed - " "waiting a bit longer...") time.sleep(0.5) except Exception as e: retries += 1 if retries >= 10: logger.error("Error getting state") raise e logger.info("Error getting state - retrying...") time.sleep(0.5) if processors_not_finished != 0: running_cores = helpful_functions.get_cores_in_state( all_core_subsets, CPUState.RUNNING, txrx) if len(running_cores) > 0: raise exceptions.ExecutableFailedToStopException( "Simulation did not finish within the time allocated. " "Please try increasing the machine time step and / " "or time scale factor in your simulation.", helpful_functions.get_core_subsets(running_cores), False) processors_exited = txrx.get_core_state_count(app_id, CPUState.PAUSED) if processors_exited < total_processors: unsuccessful_cores = helpful_functions.get_cores_not_in_state( all_core_subsets, CPUState.PAUSED, txrx) # Last chance to get out of the error state if len(unsuccessful_cores) > 0: break_down = helpful_functions.get_core_status_string( unsuccessful_cores) raise exceptions.ExecutableFailedToStopException( "{} of {} processors failed to exit successfully:" "{}".format(total_processors - processors_exited, total_processors, break_down), helpful_functions.get_core_subsets(unsuccessful_cores), True) logger.info("Application has run to completion")
def __call__(self, app_id, txrx, executable_targets, no_sync_changes, has_ran): if not has_ran: raise exceptions.ConfigurationException("The ran token is not set correctly, please fix and try again") total_processors = executable_targets.total_processors all_core_subsets = executable_targets.all_core_subsets # reset the state to the old state so that it can be used by the # application runner code if no_sync_changes % 2 == 0: sync_state = SCPSignal.SYNC0 else: sync_state = SCPSignal.SYNC1 progress_bar = ProgressBar(total_processors, "Turning off all the cores within the simulation") # check that the right number of processors are in sync0 processors_cpu_state13 = txrx.get_core_state_count(app_id, CPUState.CPU_STATE_13) finished_cores = processors_cpu_state13 while processors_cpu_state13 != total_processors: if processors_cpu_state13 > finished_cores: progress_bar.update(finished_cores - processors_cpu_state13) finished_cores = processors_cpu_state13 processors_rte = txrx.get_core_state_count(app_id, CPUState.RUN_TIME_EXCEPTION) processors_watchdogged = txrx.get_core_state_count(app_id, CPUState.WATCHDOG) if processors_rte > 0 or processors_watchdogged > 0: fail_message = "" if processors_rte > 0: rte_cores = helpful_functions.get_cores_in_state( all_core_subsets, CPUState.RUN_TIME_EXCEPTION, txrx ) fail_message += helpful_functions.get_core_status_string(rte_cores) if processors_watchdogged > 0: watchdog_cores = helpful_functions.get_cores_in_state(all_core_subsets, CPUState.WATCHDOG, txrx) fail_message += helpful_functions.get_core_status_string(watchdog_cores) raise exceptions.ExecutableFailedToStopException( "{} of {} processors went into an error state when" " shutting down: {}".format(processors_rte + processors_watchdogged, total_processors, fail_message) ) successful_cores_cpu_state13 = set( helpful_functions.get_cores_in_state(all_core_subsets, CPUState.CPU_STATE_13, txrx) ) all_cores = set(all_core_subsets) unsuccessful_cores = all_cores - successful_cores_cpu_state13 for core_subset in unsuccessful_cores: for processor in core_subset.processor_ids: byte_data = struct.pack("<I", constants.SDP_RUNNING_MESSAGE_CODES.SDP_STOP_ID_CODE.value) txrx.send_sdp_message( SDPMessage( sdp_header=SDPHeader( flags=SDPFlag.REPLY_NOT_EXPECTED, destination_port=(constants.SDP_PORTS.RUNNING_COMMAND_SDP_PORT.value), destination_cpu=processor, destination_chip_x=core_subset.x, destination_chip_y=core_subset.y, ), data=byte_data, ) ) processors_cpu_state13 = txrx.get_core_state_count(app_id, CPUState.CPU_STATE_13) txrx.send_signal(app_id, sync_state) progress_bar.end()