def set_powerplay_table(name: str, table: PowerPlayTable): """ Updates and commits a PowerPlay table from list of clock/voltage tuples Global config limits are respected / out of limits p-states are ignored """ powerplay_filepath = Config.get('files.powerplay') prefix = TABLES[name][1] clock_limit = Config.get('{}.clock.limit'.format(name)) voltage_limit = Config.get('{}.voltage.limit'.format(name)) for level, (clock, voltage) in enumerate(table): # apply limits if clock > clock_limit: sudo_tee_a(powerplay_filepath, 'r') raise ValueError( 'ignored p-state over the %s clock limit %s MHz > %s MHz', name, clock, clock_limit) if voltage > voltage_limit: sudo_tee_a(powerplay_filepath, 'r') raise ValueError( 'ignored p-state over the %s voltage limit %s mV > %s mV', name, voltage, voltage_limit) # update the p-state raw_pstate = '{prefix} {level:d} {clock:d} {voltage:d}'.format( prefix=prefix, level=level, clock=clock, voltage=voltage) sudo_tee_a(powerplay_filepath, raw_pstate) # commit (really apply changes to the card) sudo_tee_a(powerplay_filepath, 'c')
def test_persist(self, time): with tempfile.TemporaryDirectory() as d: Config.update({'metrics': {'outputdir': d}}) persist([ get_metric('a.b.c', '1'), get_metric('e', '2'), ]) with open(os.path.join(d, 'vatu-123456123456.metrics')) as metricsfile: metrics = metricsfile.readlines() self.assertEqual(metrics[0], '123456123456// a.b.c{} 1\n') self.assertEqual(metrics[1], '123456123456// e{} 2\n')
def set_power_limit(limit: int): """ Set the card's power cap """ power_limit = Config.get('card.power.limit') if int(limit) > power_limit: raise ValueError( "requested power limit %s is higher than the config limit %s", limit, power_limit) power_filepath = Config.get('files.power') actual = limit * 10**6 sudo_tee(power_filepath, actual)
def sense(self): """ Read sensors and current settings """ self.current_state = State() logging.info(self.current_state) self.states.append(self.current_state) if Config.get('metrics.enabled'): metrics.persist(self.current_state.metrics())
def test_set_power_limit(self): with tempfile.NamedTemporaryFile('w') as f: Config.update({'files': {'power': f.name}}) Config.update({'readonly': False}) set_power_limit(100) self.assertEqual(get_power_limit(), 100) # make sure we honor read-only mode Config.update({'readonly': True}) set_power_limit(99) self.assertEqual(get_power_limit(), 100) # make sure we honor the power limit Config.update({'readonly': False}) Config.update({'card': {'power': {'limit': 150}}}) with self.assertRaises(ValueError): set_power_limit(200) self.assertEqual(get_power_limit(), 100)
def think(self) -> List[Action]: actions = super().think() if actions: return actions # everything good, raise core clock if self.current_state.core_plevel >= 5: new_clock = self.current_state.core_pptable[-1][0] + Config.get( 'core.clock.step') logging.info( "p-level stable and high enough, raising core clock to %sMHz", new_clock) return [SetCoreClock(new_clock)] # reaching power limit at a lower than p5/6/7 pstate, lower clock and raise power limit elif self.current_state.gpu_power_limit - self.current_state.gpu_power_usage < 20: new_clock = self.current_state.core_pptable[-1][ 0] - 2 * Config.get('core.clock.step') new_power = self.current_state.gpu_power_limit + int( Config.get('card.power.step')) logging.info( "p-level stable but too low, close to power limit, raising power limit to %sW", new_power) return [ SetCoreClock(new_clock), SetPowerLimit(new_power), ] # lack of core power, lower clock and raise core voltage else: new_clock = self.current_state.core_pptable[-1][ 0] - 2 * Config.get('core.clock.step') new_voltage = self.current_state.core_pptable[-1][1] + int( Config.get('core.voltage.step')) logging.info( "p-level stable but too low, far from power limit, raising core voltage to %smV", new_voltage) return [ SetCoreClock(new_clock), SetCoreVoltage(new_voltage), ]
def sudo_tee_a(filepath, string): """ Appends *string* to *filepath* unless we are in read-only mode """ logging.debug('echo %s | sudo tee -a %s', string, filepath) if Config.get("readonly"): logging.debug( "read-only, we didn't actually append anything but feel free to execute that command as root" ) return # how much sanitizing can one really need with open(filepath, 'a') as f: print(string, file=f)
def persist(metrics: List[Metric]) -> str: """ Write metrics as Sensision metrics to disk, can be consumed by Beamium """ outputdir = Config.get('metrics.outputdir') os.makedirs(outputdir, exist_ok=True) filename = 'vatu-{}.metrics'.format(int(round(time.time() * 1000000))) with open(os.path.join(outputdir, filename), 'w') as f: for metric in metrics: logging.debug(json.dumps(metric)) datapoint = '{timestamp}// {name}{{}} {value}'.format(**metric) print(datapoint, file=f) return filename
def test_get_power_limit(self): Config.update({'files': {'power': get_filepath('power1_cap')}}) self.assertEqual(get_power_limit(), 123)
def setUpClass(cls): Config.update({'files': {'gpuinfo': get_filepath('amdgpu_pm_info')}})
def read_raw_powerplay_tables() -> List[str]: """ Reads the PowerPlay clock/voltage file *pp_od_clk_voltage* """ powerplay_filepath = Config.get('files.powerplay') raw = sudo_cat(powerplay_filepath) return list(map(str.strip, raw))
def get_raw_gpu_infos() -> List[str]: """ Returns the contents of amdgpu_pm_info """ filepath = Config.get('files.gpuinfo') raw = sudo_cat(filepath) return list(map(str.strip, raw))
def gpu_temperature_is_too_high(self) -> bool: return self.current_state.gpu_temperature >= Config.get( 'card.temperature.limit')
def gpu_load_too_low(self) -> bool: return self.current_state.gpu_load < Config.get('card.load.minimum')
def get_power_limit() -> int: """ Reads the card's power cap """ powerfile = Config.get('files.power') lines = sudo_cat(powerfile) return int(int(lines[0]) / 10**6)
def test_get_memory_plevel(self): Config.update({'files': {'memory': get_filepath('pp_dpm_mclk')}}) level = get_memory_plevel() self.assertEqual(level, 3)
def get_core_plevel() -> int: """ Returns the current core PowerPlay level """ pstate_filepath = Config.get('files.core') return get_active_pstate(pstate_filepath)[0]
def test_set_powerplay_table(self): with tempfile.NamedTemporaryFile('w+') as f: Config.update({'files': {'powerplay': f.name}}) Config.update({'readonly': False}) set_powerplay_table('core', [(1200, 800), (1400, 900)]) self.assertEqual(f.readlines(), ['s 0 1200 800\n', 's 1 1400 900\n', 'c\n']) # make sure we honor the clock limit with tempfile.NamedTemporaryFile('w+') as f: Config.update({'files': {'powerplay': f.name}}) Config.update({'core': {'clock': {'limit': 1650}}}) Config.update({'core': {'voltage': {'limit': 1300}}}) with self.assertRaises(ValueError): set_powerplay_table('core', [(1500, 1050), (1650, 1050), (1700, 1200)]) self.assertEqual(f.readlines(), ['s 0 1500 1050\n', 's 1 1650 1050\n', 'r\n']) # make sure we honor the voltage limit with tempfile.NamedTemporaryFile('w+') as f: Config.update({'files': {'powerplay': f.name}}) Config.update({'core': {'clock': {'limit': 1800}}}) Config.update({'core': {'voltage': {'limit': 1100}}}) with self.assertRaises(ValueError): set_powerplay_table('core', [(1600, 1050), (1650, 1100), (1700, 1200)]) self.assertEqual(f.readlines(), ['s 0 1600 1050\n', 's 1 1650 1100\n', 'r\n']) # make sure we honor read-only mode with tempfile.NamedTemporaryFile('w+') as f: Config.update({'files': {'powerplay': f.name}}) Config.update({'readonly': True}) set_powerplay_table('core', [(1200, 800), (1400, 900)]) self.assertEqual(f.readlines(), [])
def test_get_powerplay_table(self): Config.update({'files': {'powerplay': get_filepath('pp_od_clk_voltage')}}) table = get_powerplay_table('memory') self.assertEqual(table, [(167, 800), (500, 800), (800, 950), (1234, 1050)])
def get_memory_plevel() -> int: """ Returns the current memory PowerPlay level """ pstate_filepath = Config.get('files.memory') return get_active_pstate(pstate_filepath)[0]
def test_get_core_plevel(self): Config.update({'files': {'core': get_filepath('pp_dpm_sclk')}}) level = get_core_plevel() self.assertEqual(level, 0)
def cli(verbose, config): """ Vega Auto Tuner """ setup_logging(verbose) if config: Config.load(config)