def __init__(self, writer, stop_reward, telem=False, plot=False): self.writer = writer self.stop_reward = stop_reward self.rewards = np.array([]) self.scores = np.array([]) self.mean_scores = np.array([]) self.count = 0 if plot: f, (self.ax1, self.ax2) = plt.subplots(2, 1) plt.ion() plt.show() self.telemetry = telem if telem: self.tm = telemetry.ApplicationTelemetry() if telem: if not os.path.exists('/results'): os.makedirs('/results') self.outfile = '/results/output.txt' else: curdir = os.path.abspath(__file__) results = os.path.abspath(os.path.join(curdir, '../../../../results')) if not os.path.exists(results): os.makedirs(results) self.outfile = os.path.join(results, 'output.txt') self.fieldnames = ['frames', 'games', 'mean reward', 'mean score', 'max score'] with open(self.outfile, 'w', newline='') as csvfile: csv_writer = csv.DictWriter(csvfile, fieldnames=self.fieldnames) csv_writer.writeheader()
def setup_monitoring(self, monitoring, monitoring_dir=None): self.monitoring = monitoring self.monitoring_dir = monitoring_dir if monitoring == 'telemetry': import telemetry self.tm = telemetry.ApplicationTelemetry() if self.tm.get_status() == 0: print('Telemetry successfully connected.') elif monitoring == 'tensorboard': import tensorboardX self.tb = tensorboardX.SummaryWriter(monitoring_dir) else: raise NotImplementedError('Monitoring tool "%s" not supported!' % monitoring)
import subprocess try: import telemetry ngc_telemetry = telemetry.ApplicationTelemetry() except: ngc_telemetry = False print("Could not load NGC telemetry!") def push_ngc_telemetry(name, value): # if NGC telemetry logging enabled: try: ngc_telemetry.metric_push_async({'metric': name, 'value': value}) except: pass def log_ngc(train_metric): for key in train_metric.keys(): push_ngc_telemetry(key, train_metric[key]) def get_gpu_memory_map(): """Get the current gpu usage. Returns ------- usage: dict Keys are device ids as integers. Values are memory usage as integers in MB.