def push(td_client, logline: str, logfile_year, rindex): """Push the processed metrics data to the metrics service""" del logfile_year # Ignored parameter for now try: emr = json.loads(logline+"\n") etimes_dict = emr["etimes"] etimes: dict = dict() for key, value_any in etimes_dict.items(): val = value_any["value"] if "type" in value_any: val_type = value_any["type"] etimes[key] = tdp.Any(type=val_type, value=str(val)) else: etimes[key] = tdp.Any(value=str(val)) values_dict = emr["values"] scalars: dict = dict() for key in values_dict: value_any = values_dict[key] val = value_any["value"] if "type" in value_any: val_type = value_any["type"] scalars[key] = tdp.Any(type=val_type, value=val) else: scalars[key] = tdp.Any(value=val) emr_meta = emr["meta"] training_id = emr_meta["training_id"] if training_id == None and "TRAINING_ID" in os.environ: training_id = os.environ["TRAINING_ID"], emetrics = tdp.EMetrics( meta=tdp.MetaInfo( training_id=training_id, time=int(emr_meta["time"]), rindex=int(emr_meta["rindex"]), ), grouplabel=emr["grouplabel"], etimes=etimes, values=scalars, ) if td_client is not None: td_client.AddEMetrics(emetrics) # for now, print to stdout (support old endpoint). # TODO: Don't print to stdout for metrics json_form = print_json.to_string(emetrics) print(json_form) except Exception as inst: print("Unexpected error when attempting to process evaluation metric record:", sys.exc_info()[0]) print(inst) sys.stdout.flush() return rindex+1
def emitEvalMetric( td_client: tdb.TrainingDataClientBuffered, group_label: str, # group label, likely test or train iterStep, timestamp, values_dict, rindex: int, event_wall_time: float, sub_identifier): """Push the processed metrics data to the metrics service""" try: etimes = dict() etimes['iteration'] = tdp.Any(type=tdp.Any.INT, value=str(iterStep)) etimes['timestamp'] = tdp.Any(type=tdp.Any.STRING, value=timestamp) etimes['wall_time'] = tdp.Any(type=tdp.Any.FLOAT, value=str(event_wall_time)) # # d = datetime.datetime.utcnow() # <-- get time in UTC # # print d.isoformat("T") + "Z" # # if timestamp == None: # # timestamp = start_time + datetime.timedelta(seconds=rowdict['Seconds']) # # dict_MetricData['timestamp'] = timestamp # time=int(event_wall_time * 1000), etb_logger.debug("Creating emetrics record") emetrics = tdp.EMetrics(meta=tdp.MetaInfo( training_id=os.environ["TRAINING_ID"], time=extract_datetime.get_meta_timestamp(), rindex=int(rindex), subid=sub_identifier), grouplabel=group_label, etimes=etimes, values=values_dict) if td_client is not None: etb_logger.debug("Calling AddEMetrics") td_client.AddEMetrics(emetrics) except Exception as inst: etb_logger.error( "Unexpected error when attempting to send emetrics: %s", sys.exc_info()[0]) print("Unexpected error when attempting to send emetrics:", sys.exc_info()[0]) print(type(inst)) print(inst.args) traceback.print_exc() print(inst) sys.stdout.flush() return rindex + 1
def emitEvalMetric(em_file_path: str, log_dir, td_client, group_label, iterStep, timestamp, values_dict, rindex, eventWallTime): '''Push the processed metrics data to the metrics service''' try: etimes = dict() etimes['iteration'] = tdp.Any(type=tdp.Any.INT, value=str(iterStep)) etimes['timestamp'] = tdp.Any(type=tdp.Any.STRING, value=timestamp) # # d = datetime.datetime.utcnow() # <-- get time in UTC # # print d.isoformat("T") + "Z" # # if timestamp == None: # # timestamp = start_time + datetime.timedelta(seconds=rowdict['Seconds']) # # dict_MetricData['timestamp'] = timestamp emetrics = tdp.EMetrics( meta=tdp.MetaInfo( training_id=os.environ["TRAINING_ID"], time=int(eventWallTime), rindex=int(rindex) ), grouplabel=group_label, etimes=etimes, values=values_dict ) if td_client is not None: td_client.AddEMetrics(emetrics) json_form = print_json.to_string(emetrics) with open(em_file_path, 'a') as em_stream: em_stream.write(json_form) em_stream.write("\n") # for now, print to stdout. # TODO: Don't print to stdout for metrics print(json_form) except Exception as inst: print("Unexpected error when attempting to send emetrics:", sys.exc_info()[0]) print(type(inst)) print(inst.args) traceback.print_exc() print(inst) sys.stdout.flush() return rindex+1
def report(self, rindex: int) -> int: if self.is_queued_for_report(): values_dict = {} event_list = self.step_to_event_list_dict[self.step_added] rfcRFC3339Formatted = None event_wall_time = 0.0 # Transmogrify the ScalerEvent list into a values dictionary for event_set in event_list: label = event_set.label event = event_set.event # type: event_accumulator.ScalarEvent if event_wall_time == 0.0: event_wall_time = float(event.wall_time) if rfcRFC3339Formatted is None: dt = datetime.datetime.fromtimestamp(event.wall_time) rfcRFC3339Formatted = dt.isoformat("T") + "Z" values_dict[label] = tdp.Any(type=tdp.Any.FLOAT, value=str(event.value)) # etb_logger.debug("Calling emitEvalMetric step=%r: rindex=%r", self.step_added, rindex) emitEvalMetric(td_client=self.td_client, group_label=self.group, iterStep=self.step_added, timestamp=rfcRFC3339Formatted, values_dict=values_dict, rindex=rindex, event_wall_time=event_wall_time, sub_identifier=self.sub_identifier) rindex += 1 self.step_added = -1 self.wall_time = -1.0 return rindex
def emitEvalMetricViaTracker(em_file_path: str, tracker, iterStep, reportInterval, remainderCompare, rindex): ea = tracker.ea ea.Reload() # loads events from file # if emitIntermediateDiagnostics: # emitEvalMetricMessage(tracker.log_dir, iterStep, # "skimming at iterStep #"+str(iterStep)) # # # rootScalerRecord = ea.Scalars(labelKeyList[0][1])[0] # # print(rootScalerRecord) tags = ea.Tags() scaler_keys = tags[event_accumulator.SCALARS] for scaler_key in scaler_keys: scalerEvents = ea.Scalars(scaler_key) # print(type(scalerEvents)) for event in scalerEvents: # time.sleep(0.1) label_event_list = tracker.iterDict.get(str(event.step)) if label_event_list == None: eventSet = EventSet() eventSet.label = scaler_key eventSet.event = event tracker.iterDict[str(event.step)] = [eventSet] else: if isInList(label_event_list, scaler_key, event.step) == True: #already been reported, continue continue eventSet = EventSet() eventSet.label = scaler_key eventSet.event = event tracker.iterDict[str(event.step)].append(eventSet) latest_iter = iterStep for key in tracker.iterDict.keys(): labelValueList = {} eventList = tracker.iterDict[key] foundReportable = False rfcRFC3339Formatted = None eventWallTime = None for eventSet in eventList: if eventSet.reported == False: foundReportable = True label = eventSet.label; event = eventSet.event iterStep = event.step eventWallTime = event.wall_time if iterStep > latest_iter: latest_iter = iterStep if rfcRFC3339Formatted == None: dt = datetime.datetime.fromtimestamp(event.wall_time) rfcRFC3339Formatted = dt.isoformat("T") + "Z" labelValueList[label] = tdp.Any(type=tdp.Any.FLOAT, value=str(event.value)) eventSet.reported = True if reportInterval > 0 and ((iterStep % reportInterval) != remainderCompare): foundReportable = False if foundReportable: emitEvalMetric(em_file_path, tracker.log_dir, tracker.tdClient, tracker.group, iterStep, rfcRFC3339Formatted, labelValueList, rindex, eventWallTime) rindex+=1 # time.sleep(0.1) return latest_iter, rindex
def extract_and_push_emetrics(td_client: tdb.TrainingDataClientBuffered, log_file: str, log_line: str, logfile_year: str, line_index: int, record_index: int, subdir: str, extra_any: Any = None) -> Tuple[int, int]: state: ExtraPushData4RegExExtractor = extra_any # # Do our best to get a good start time. # if not state.did_get_good_time: # # keep trying to get a good start time from the log line, until it's pointless # start_time, did_get_good_time = \ # edt.extract_datetime(log_line, logfile_year, state.start_time) if td_client.em_file_path is None: em_file_path = os.path.join(os.path.dirname(log_file), match_log_file.EMETRICS_FILE_BASE_NAME) td_client.set_em_file_path(em_file_path) logging.debug("em_file_path: %s, subdir: %s", td_client.em_file_path, subdir) # logging.debug("push_log_line.push %d (subdir: %s) %s", line_index, subdir, log_line) line_index, _ = push_log_line.push(td_client, log_file, log_line, logfile_year, line_index, 0, subdir, state) state.line_length_stack.append(len(log_line)) state.text_window += log_line if len(state.line_length_stack) > state.line_lookahead: length_first_line = state.line_length_stack[0] state.line_length_stack = state.line_length_stack[1:] state.text_window = state.text_window[length_first_line:] logging.debug("len groups: %d", len(state.groups)) for group_key in state.groups: group = state.groups[group_key] name = group_key regex_expanded = group["regex_expanded"] matches = regex_expanded.match(state.text_window) if matches is not None: values_dict = matches.groupdict() # meta_dict_desc = group["meta"] etimes_descriptions: dict = group["etimes"] if etimes_descriptions is None: logging.warning("Did not find etimes! Found: ") for axis_key in group: print("key: " + axis_key) sys.stdout.flush() break etimes: dict = dict() for etime_key in etimes_descriptions: item = etimes_descriptions[etime_key] valOrRef: str = item["value"] if valOrRef.startswith("$"): value_inner = valOrRef[1:] value_actual = values_dict[value_inner] else: value_actual = valOrRef grpc_value_type = type_string_to_grpc_type(item["type"]) etimes[etime_key] = tdp.Any(type=grpc_value_type, value=value_actual) if "scalars" in group: scalars_descriptions: dict = group["scalars"] elif "values" in group: scalars_descriptions: dict = group["values"] else: scalars_descriptions = None if scalars_descriptions is None: logging.warning("Did not find scalars! Found: ") for axis_key in group: print("key: " + axis_key) sys.stdout.flush() break scalars: dict = dict() for scalar_key in scalars_descriptions: item = scalars_descriptions[scalar_key] valOrRef: str = item["value"] if valOrRef.startswith("$"): value_inner = valOrRef[1:] value_actual = values_dict[value_inner] else: value_actual = valOrRef value_type = item["type"] grpc_value_type = type_string_to_grpc_type(value_type) scalars[scalar_key] = tdp.Any(type=grpc_value_type, value=value_actual) # date_string: str = line subid_value = subdir if "meta" in group: meta_list: dict = group["meta"] # if "time" in meta_list: # valOrRef: str = meta_list["time"] # if valOrRef.startswith("$"): # value_ref = valOrRef[1:] # # date_string = values_dict[value_ref] # else: # # date_string = valOrRef # pass if "subid" in meta_list: valOrRef: str = meta_list["subid"] if valOrRef.startswith("$"): logging.debug( "resetting subdir(subid): %s, metalist: %r", subid_value, meta_list) value_ref = valOrRef[1:] subid_value = values_dict[value_ref] elif not valOrRef == "": logging.debug( "resetting subdir(subid): %s, metalist: %r", subid_value, meta_list) subid_value = valOrRef logging.debug( "about to push evaluation metrics with subdir(subid): %s", subid_value) # At this point, don't keep trying to get a start time if we haven't already state.did_get_good_time = True emetrics = tdp.EMetrics(meta=tdp.MetaInfo( training_id=os.environ["TRAINING_ID"], time=edt.get_meta_timestamp(), rindex=record_index, subid=subid_value), grouplabel=name, etimes=etimes, values=scalars) record_index += 1 # state.total_lines_pushed += 1 if td_client is not None: td_client.AddEMetrics(emetrics) # for now, print to stdout (support old endpoint). # print(json_form) state.text_window = "" state.line_length_stack = [] break return line_index, record_index
def extract(em_file_path: str, manifest: str, follow: bool, should_connect: bool=True): dir_path = os.path.dirname(os.path.realpath(__file__)) symbol_dict: Dict[str, str] = read_symbol_libs(dir_path) evaluation_metrics_spec = read_extract_description(manifest, symbol_dict) logfile = evaluation_metrics_spec["in"] job_directory = os.environ["JOB_STATE_DIR"] regex = r"\$JOB_STATE_DIR" logfile = re.sub(regex, job_directory, logfile, 0) # Not sure why I seem to loose the under-bar somewhere along the line. if "line_lookahead" in evaluation_metrics_spec: line_lookahead: int = int(evaluation_metrics_spec["line_lookahead"]) elif "linelookahead" in evaluation_metrics_spec: line_lookahead: int = int(evaluation_metrics_spec["linelookahead"]) else: line_lookahead: int = 4 groups: dict = evaluation_metrics_spec["groups"] line_length_stack: List[int] = [] text_window = "" record_index = 0 read_pos = 0 line_index = 1 learner_job_is_running = True logfile_year = None start_time: datetime = None did_get_good_time: bool = False if should_connect: tdClient = connect.get_connection() else: tdClient = None while learner_job_is_running: if os.path.exists(logfile): if logfile_year is None: logfile_year = extract_datetime.get_log_created_year(logfile) with open(logfile, 'r') as log_stream: log_stream.seek(read_pos) try: for line in iter(log_stream): # Do our best to get a good start time. if not did_get_good_time: # keep trying to get a good start time from the log line, until it's pointless start_time, did_get_good_time = \ extract_datetime.extract_datetime(line, logfile_year, start_time) line_index = push_log_line.push(tdClient, line, logfile_year, line_index) line_length_stack.append(len(line)) text_window += line if len(line_length_stack) > line_lookahead: length_first_line = line_length_stack[0] line_length_stack = line_length_stack[1:] text_window = text_window[length_first_line:] for group_key in groups: group = groups[group_key] name = group_key regex_expanded = group["regex_expanded"] matches = regex_expanded.match(text_window) if matches is not None: values_dict = matches.groupdict() # meta_dict_desc = group["meta"] etimes_descriptions: dict = group["etimes"] if etimes_descriptions is None: print("Did not find etimes! Found: ") for axis_key in group: print("key: "+axis_key) sys.stdout.flush() break etimes: dict = dict() for etime_key in etimes_descriptions: item = etimes_descriptions[etime_key] valOrRef: str = item["value"] if valOrRef.startswith("$"): value_inner = valOrRef[1:] value_actual = values_dict[value_inner] else: value_actual = valOrRef grpc_value_type = type_string_to_grpc_type(item["type"]) etimes[etime_key] = tdp.Any(type=grpc_value_type, value=value_actual) if "scalars" in group: scalars_descriptions: dict = group["scalars"] elif "values" in group: scalars_descriptions: dict = group["values"] else: scalars_descriptions = None if scalars_descriptions is None: print("Did not find scalars! Found: ") for axis_key in group: print("key: "+axis_key) sys.stdout.flush() break scalars: dict = dict() for scalar_key in scalars_descriptions: item = scalars_descriptions[scalar_key] valOrRef: str = item["value"] if valOrRef.startswith("$"): value_inner = valOrRef[1:] value_actual = values_dict[value_inner] else: value_actual = valOrRef value_type = item["type"] grpc_value_type = type_string_to_grpc_type(value_type) scalars[scalar_key] = tdp.Any(type=grpc_value_type, value=value_actual) date_string: str = line if "meta" in group: meta_list: dict = group["meta"] if "time" in meta_list: valOrRef: str = meta_list["time"] if valOrRef.startswith("$"): value_ref = valOrRef[1:] date_string = values_dict[value_ref] else: date_string = valOrRef # At this point, don't keep trying to get a start time if we haven't already did_get_good_time = True # TODO: pass in the type specified by the regex line_time, _ = extract_datetime.extract_datetime(date_string, logfile_year, None) microseconds = (line_time - start_time).microseconds timestamp = int(microseconds) record_index += 1 emetrics = tdp.EMetrics( meta=tdp.MetaInfo( training_id=os.environ["TRAINING_ID"], time=timestamp, rindex=record_index ), grouplabel=name, etimes=etimes, values=scalars ) json_form = print_json.to_string(emetrics) with open(em_file_path, 'a') as em_stream: em_stream.write(json_form) em_stream.write("\n") if tdClient is not None: tdClient.AddEMetrics(emetrics) # for now, print to stdout (support old endpoint). # TODO: Don't print to stdout for metrics print(json_form) text_window = "" line_length_stack = [] break except Exception as inst: print("Unexpected error when attempting to process evaluation metric record:", sys.exc_info()[0]) print(inst) sys.stdout.flush() read_pos = log_stream.tell() learner_job_is_running = follow # wait a second before reading the file again # (unless you want to constantly check the logs for new content?) time.sleep(1)
def push(td_client: tdb.TrainingDataClientBuffered, log_file: str, log_line: str, logfile_year: str, rindex: int, rindex2: int, subdir: str, extra: Any = None) -> Tuple[int, int]: """Push the processed metrics data to the metrics service""" del log_file # Ignored parameter for now del logfile_year # Ignored parameter for now del extra # Ignored parameter for now try: emr = json.loads(log_line + "\n") logging.debug("Processing etimes...") etimes_dict = emr["etimes"] etimes: dict = dict() for key, value_any in etimes_dict.items(): val = value_any["value"] if "type" in value_any: val_type = value_any["type"] etimes[key] = tdp.Any(type=val_type, value=str(val)) else: etimes[key] = tdp.Any(value=str(val)) logging.debug("Processing values...") values_dict = emr["values"] scalars: dict = dict() for key in values_dict: value_any = values_dict[key] val = value_any["value"] if "type" in value_any: val_type = value_any["type"] scalars[key] = tdp.Any(type=val_type, value=val) else: scalars[key] = tdp.Any(value=val) emr_meta = emr["meta"] logging.debug("Getting training_id...") training_id = emr_meta["training_id"] if training_id is None and "TRAINING_ID" in os.environ: training_id = os.environ["TRAINING_ID"], if "time" in emr_meta and emr_meta["time"] is not None: time = emr_meta["time"] else: time = edt.get_meta_timestamp() if "subid" in emr_meta and emr_meta["subid"] is not None: subid = emr_meta["subid"] else: subid = subdir logging.debug("Assembling record...") emetrics = tdp.EMetrics( meta=tdp.MetaInfo(training_id=training_id, time=time, rindex=int(emr_meta["rindex"]), subid=subid), grouplabel=emr["grouplabel"], etimes=etimes, values=scalars, ) if td_client is not None: if False: print_json.logging_output(emetrics) td_client.AddEMetrics(emetrics) except Exception as inst: print( "Unexpected error when attempting to process evaluation metric record:", sys.exc_info()[0]) print(inst) sys.stdout.flush() return rindex + 1, rindex2