def should_do_shrinkage(iter, model_file, shrink_saturation_threshold, get_raw_nnet_from_am=True): if iter == 0: return True if get_raw_nnet_from_am: output = common_lib.get_command_stdout( "nnet3-am-info {0} 2>/dev/null | " "steps/nnet3/get_saturation.pl".format(model_file)) else: output = common_lib.get_command_stdout( "nnet3-info 2>/dev/null {0} | " "steps/nnet3/get_saturation.pl".format(model_file)) output = output.strip().split("\n") try: assert len(output) == 1 saturation = float(output[0]) assert saturation >= 0 and saturation <= 1 except: raise Exception("Something went wrong, could not get " "saturation from the output '{0}' of " "get_saturation.pl on the info of " "model {1}".format(output, model_file)) return saturation > shrink_saturation_threshold
def should_do_shrinkage(iter, model_file, shrink_saturation_threshold, get_raw_nnet_from_am=True): if iter == 0: return True if get_raw_nnet_from_am: output = common_lib.get_command_stdout( "nnet3-am-info {0} 2>/dev/null | " "steps/nnet3/get_saturation.pl".format(model_file)) else: output = common_lib.get_command_stdout( "nnet3-info 2>/dev/null {0} | " "steps/nnet3/get_saturation.pl".format(model_file)) output = output.strip().split("\n") try: assert len(output) == 1 saturation = float(output[0]) assert saturation >= 0 and saturation <= 1 except: raise Exception("Something went wrong, could not get " "saturation from the output '{0}' of " "get_saturation.pl on the info of " "model {1}".format(output, model_file)) return saturation > shrink_saturation_threshold
def parse_prob_logs(exp_dir, key='accuracy', output="output"): train_prob_files = "%s/log/compute_prob_train.*.log" % (exp_dir) valid_prob_files = "%s/log/compute_prob_valid.*.log" % (exp_dir) train_prob_strings = common_lib.get_command_stdout( 'grep -e {0} {1}'.format(key, train_prob_files)) valid_prob_strings = common_lib.get_command_stdout( 'grep -e {0} {1}'.format(key, valid_prob_files)) # LOG # (nnet3-chain-compute-prob:PrintTotalStats():nnet-chain-diagnostics.cc:149) # Overall log-probability for 'output' is -0.399395 + -0.013437 = -0.412832 # per frame, over 20000 fra # LOG # (nnet3-chain-compute-prob:PrintTotalStats():nnet-chain-diagnostics.cc:144) # Overall log-probability for 'output' is -0.307255 per frame, over 20000 # frames. parse_regex = re.compile( ".*compute_prob_.*\.([0-9]+).log:LOG " ".nnet3.*compute-prob.*:PrintTotalStats..:" "nnet.*diagnostics.cc:[0-9]+. Overall ([a-zA-Z\-]+) for " "'{output}'.*is ([0-9.\-e]+) .*per frame".format(output=output)) train_objf = {} valid_objf = {} for line in train_prob_strings.split('\n'): mat_obj = parse_regex.search(line) if mat_obj is not None: groups = mat_obj.groups() if groups[1] == key: train_objf[int(groups[0])] = groups[2] if not train_objf: raise KaldiLogParseException("Could not find any lines with {k} in " " {l}".format(k=key, l=train_prob_files)) for line in valid_prob_strings.split('\n'): mat_obj = parse_regex.search(line) if mat_obj is not None: groups = mat_obj.groups() if groups[1] == key: valid_objf[int(groups[0])] = groups[2] if not valid_objf: raise KaldiLogParseException("Could not find any lines with {k} in " " {l}".format(k=key, l=valid_prob_files)) iters = list(set(valid_objf.keys()).intersection(train_objf.keys())) if not iters: raise KaldiLogParseException("Could not any common iterations with" " key {k} in both {tl} and {vl}".format( k=key, tl=train_prob_files, vl=valid_prob_files)) iters.sort() return list( map(lambda x: (int(x), float(train_objf[x]), float(valid_objf[x])), iters))
def parse_prob_logs(exp_dir, key='accuracy', output="output"): train_prob_files = "%s/log/compute_prob_train.*.log" % (exp_dir) valid_prob_files = "%s/log/compute_prob_valid.*.log" % (exp_dir) train_prob_strings = common_lib.get_command_stdout( 'grep -e {0} {1}'.format(key, train_prob_files)) valid_prob_strings = common_lib.get_command_stdout( 'grep -e {0} {1}'.format(key, valid_prob_files)) # LOG # (nnet3-chain-compute-prob:PrintTotalStats():nnet-chain-diagnostics.cc:149) # Overall log-probability for 'output' is -0.399395 + -0.013437 = -0.412832 # per frame, over 20000 fra # LOG # (nnet3-chain-compute-prob:PrintTotalStats():nnet-chain-diagnostics.cc:144) # Overall log-probability for 'output' is -0.307255 per frame, over 20000 # frames. parse_regex = re.compile( ".*compute_prob_.*\.([0-9]+).log:LOG " ".nnet3.*compute-prob.*:PrintTotalStats..:" "nnet.*diagnostics.cc:[0-9]+. Overall ([a-zA-Z\-]+) for " "'{output}'.*is ([0-9.\-e]+) .*per frame".format(output=output)) train_objf = {} valid_objf = {} for line in train_prob_strings.split('\n'): mat_obj = parse_regex.search(line) if mat_obj is not None: groups = mat_obj.groups() if groups[1] == key: train_objf[int(groups[0])] = groups[2] if not train_objf: raise KaldiLogParseException("Could not find any lines with {k} in " " {l}".format(k=key, l=train_prob_files)) for line in valid_prob_strings.split('\n'): mat_obj = parse_regex.search(line) if mat_obj is not None: groups = mat_obj.groups() if groups[1] == key: valid_objf[int(groups[0])] = groups[2] if not valid_objf: raise KaldiLogParseException("Could not find any lines with {k} in " " {l}".format(k=key, l=valid_prob_files)) iters = list(set(valid_objf.keys()).intersection(train_objf.keys())) if not iters: raise KaldiLogParseException("Could not any common iterations with" " key {k} in both {tl} and {vl}".format( k=key, tl=train_prob_files, vl=valid_prob_files)) iters.sort() return list(map(lambda x: (int(x), float(train_objf[x]), float(valid_objf[x])), iters))
def parse_progress_logs_for_nonlinearity_stats(exp_dir): """Parse progress logs for mean and std stats for non-linearities. e.g. for a line that is parsed from progress.*.log: exp/nnet3/lstm_self_repair_ld5_sp/log/progress.9.log:component name=Lstm3_i type=SigmoidComponent, dim=1280, self-repair-scale=1e-05, count=1.96e+05, value-avg=[percentiles(0,1,2,5 10,20,50,80,90 95,98,99,100)=(0.05,0.09,0.11,0.15 0.19,0.27,0.50,0.72,0.83 0.88,0.92,0.94,0.99), mean=0.502, stddev=0.23], deriv-avg=[percentiles(0,1,2,5 10,20,50,80,90 95,98,99,100)=(0.009,0.04,0.05,0.06 0.08,0.10,0.14,0.17,0.18 0.19,0.20,0.20,0.21), mean=0.134, stddev=0.0397] """ progress_log_files = "%s/log/progress.*.log" % (exp_dir) stats_per_component_per_iter = {} progress_log_lines = common_lib.get_command_stdout( 'grep -e "value-avg.*deriv-avg.*oderiv" {0}'.format( progress_log_files), require_zero_status=False, ) if progress_log_lines: # cases with oderiv-rms parse_regex = re.compile(g_normal_nonlin_regex_pattern_with_oderiv) else: # cases with only value-avg and deriv-avg progress_log_lines = common_lib.get_command_stdout( 'grep -e "value-avg.*deriv-avg" {0}'.format(progress_log_files), require_zero_status=False, ) parse_regex = re.compile(g_normal_nonlin_regex_pattern) for line in progress_log_lines.split("\n"): mat_obj = parse_regex.search(line) if mat_obj is None: continue # groups = ('9', 'Lstm3_i', 'Sigmoid', '0.05...0.99', '0.502', '0.23', # '0.009...0.21', '0.134', '0.0397') groups = mat_obj.groups() component_type = groups[2] if component_type == "LstmNonlinearity": parse_regex_lstmp = re.compile(g_lstmp_nonlin_regex_pattern) mat_obj = parse_regex_lstmp.search(line) groups = mat_obj.groups() assert len(groups) == 33 for i in list(range(0, 5)): fill_nonlin_stats_table_with_regex_result( groups, i, stats_per_component_per_iter) else: fill_nonlin_stats_table_with_regex_result( groups, 0, stats_per_component_per_iter) return stats_per_component_per_iter
def parse_progress_logs_for_nonlinearity_stats(exp_dir): """ Parse progress logs for mean and std stats for non-linearities. e.g. for a line that is parsed from progress.*.log: exp/nnet3/lstm_self_repair_ld5_sp/log/progress.9.log:component name=Lstm3_i type=SigmoidComponent, dim=1280, self-repair-scale=1e-05, count=1.96e+05, value-avg=[percentiles(0,1,2,5 10,20,50,80,90 95,98,99,100)=(0.05,0.09,0.11,0.15 0.19,0.27,0.50,0.72,0.83 0.88,0.92,0.94,0.99), mean=0.502, stddev=0.23], deriv-avg=[percentiles(0,1,2,5 10,20,50,80,90 95,98,99,100)=(0.009,0.04,0.05,0.06 0.08,0.10,0.14,0.17,0.18 0.19,0.20,0.20,0.21), mean=0.134, stddev=0.0397] """ progress_log_files = "%s/log/progress.*.log" % (exp_dir) stats_per_component_per_iter = {} progress_log_lines = common_lib.get_command_stdout( 'grep -e "value-avg.*deriv-avg.*oderiv" {0}'.format(progress_log_files), require_zero_status = False) if progress_log_lines: # cases with oderiv-rms parse_regex = re.compile(g_normal_nonlin_regex_pattern_with_oderiv) else: # cases with only value-avg and deriv-avg progress_log_lines = common_lib.get_command_stdout( 'grep -e "value-avg.*deriv-avg" {0}'.format(progress_log_files), require_zero_status = False) parse_regex = re.compile(g_normal_nonlin_regex_pattern) for line in progress_log_lines.split("\n"): mat_obj = parse_regex.search(line) if mat_obj is None: continue # groups = ('9', 'Lstm3_i', 'Sigmoid', '0.05...0.99', '0.502', '0.23', # '0.009...0.21', '0.134', '0.0397') groups = mat_obj.groups() component_type = groups[2] if component_type == 'LstmNonlinearity': parse_regex_lstmp = re.compile(g_lstmp_nonlin_regex_pattern) mat_obj = parse_regex_lstmp.search(line) groups = mat_obj.groups() assert len(groups) == 33 for i in list(range(0,5)): fill_nonlin_stats_table_with_regex_result(groups, i, stats_per_component_per_iter) else: fill_nonlin_stats_table_with_regex_result(groups, 0, stats_per_component_per_iter) return stats_per_component_per_iter
def get_outputs_list(model_file, get_raw_nnet_from_am=True): """ Generates list of output-node-names used in nnet3 model configuration. It will normally return 'output'. """ if get_raw_nnet_from_am: outputs_list = common_lib.get_command_stdout( "nnet3-am-info --print-args=false {0} | " "grep -e 'output-node' | cut -f2 -d' ' | cut -f2 -d'=' ".format(model_file)) else: outputs_list = common_lib.get_command_stdout( "nnet3-info --print-args=false {0} | " "grep -e 'output-node' | cut -f2 -d' ' | cut -f2 -d'=' ".format(model_file)) return outputs_list.split()
def get_outputs_list(model_file, get_raw_nnet_from_am=True): """ Generates list of output-node-names used in nnet3 model configuration. It will normally return 'output'. """ if get_raw_nnet_from_am: outputs_list = common_lib.get_command_stdout( "nnet3-am-info --print-args=false {0} | " "grep -e 'output-node' | cut -f2 -d' ' | cut -f2 -d'=' ".format(model_file)) else: outputs_list = common_lib.get_command_stdout( "nnet3-info --print-args=false {0} | " "grep -e 'output-node' | cut -f2 -d' ' | cut -f2 -d'=' ".format(model_file)) return outputs_list.split()
def add_nnet_context_info(config_dir): """This will be removed when python script refactoring is done.""" common_lib.execute_command("nnet3-init {0}/ref.config " "{0}/ref.raw".format(config_dir)) out = common_lib.get_command_stdout("nnet3-info {0}/ref.raw | " "head -4".format(config_dir)) # out looks like this # left-context: 7 # right-context: 0 # num-parameters: 90543902 # modulus: 1 info = {} for line in out.split("\n"): parts = line.split(":") if len(parts) != 2: continue info[parts[0].strip()] = int(parts[1].strip()) # Writing the 'vars' file: # model_left_context=0 # model_right_context=7 vf = open('{0}/vars'.format(config_dir), 'w') vf.write('model_left_context={0}\n'.format(info['left-context'])) vf.write('model_right_context={0}\n'.format(info['right-context'])) vf.close()
def get_model_component_info(model_filename): """ This function reads existing model (*.raw or *.mdl) and returns array of XconfigExistingLayer one per {input,output}-node or component-node with same 'name' used in the raw model and 'dim' equal to 'output-dim' for component-node and 'dim' for {input,output}-node. e.g. layer in *.mdl -> corresponding 'XconfigExistingLayer' layer 'input-node name=ivector dim=100' -> 'existing name=ivector dim=100' 'component-node name=tdnn1.affine ... input-dim=1000 ' 'output-dim=500' -> 'existing name=tdnn1.affine dim=500' """ all_layers = [] try: f = open(model_filename, 'r') except Exception as e: sys.exit("{0}: error reading model file '{1}'".format(sys.argv[0], model_filename, repr(e))) # use nnet3-info to get component names in the model. out = common_lib.get_command_stdout("""nnet3-info {0} | grep '\-node' """ """ """.format(model_filename)) # out contains all {output, input, component}-nodes used in model_filename # It can parse lines in out like: # i.e. input-node name=input dim=40 # component-node name=tdnn1.affine component=tdnn1.affine input=lda # input-dim=300 output-dim=512 layer_names = [] key_to_value = dict() for line in out.split("\n"): parts = line.split(" ") dim = -1 for field in parts: key_value = field.split("=") if len(key_value) == 2: key = key_value[0] value = key_value[1] if key == "name": # name=** layer_name = value elif key == "dim": # for input-node dim = int(value) elif key == "output-dim": # for component-node dim = int(value) if layer_name is not None and layer_name not in layer_names: layer_names.append(layer_name) key_to_value['name'] = layer_name assert(dim != -1) key_to_value['dim'] = dim all_layers.append(xlayers.XconfigExistingLayer('existing', key_to_value, all_layers)) if len(all_layers) == 0: raise RuntimeError("{0}: model filename '{1}' is empty.".format( sys.argv[0], model_filename)) f.close() return all_layers
def add_nnet_context_info(config_dir, nnet_edits=None, existing_model=None): """Create the 'vars' file that specifies model_left_context, etc.""" common_lib.execute_command( "nnet3-init {0} {1}/ref.config " "{1}/ref.raw" "".format(existing_model if existing_model is not None else "", config_dir)) model = "{0}/ref.raw".format(config_dir) if nnet_edits is not None: model = "nnet3-copy --edits='{0}' {1} - |".format(nnet_edits, model) out = common_lib.get_command_stdout('nnet3-info "{0}"'.format(model)) # out looks like this # left-context: 7 # right-context: 0 # num-parameters: 90543902 # modulus: 1 # ... info = {} for line in out.split("\n")[:4]: # take 4 initial lines, parts = line.split(":") if len(parts) != 2: continue info[parts[0].strip()] = int(parts[1].strip()) # Writing the 'vars' file: # model_left_context=0 # model_right_context=7 vf = open("{0}/vars".format(config_dir), "w") vf.write("model_left_context={0}\n".format(info["left-context"])) vf.write("model_right_context={0}\n".format(info["right-context"])) vf.close()
def get_input_model_info(input_model): """ This function returns a dictionary with keys "model_left_context" and "model_right_context" and values equal to the left/right model contexts for input_model. This function is useful when using the --trainer.input-model option instead of initializing the model using configs. """ variables = {} try: out = common_lib.get_command_stdout("""nnet3-info {0} | """ """head -4 """.format(input_model)) # out looks like this # left-context: 7 # right-context: 0 # num-parameters: 90543902 # modulus: 1 for line in out.split("\n"): parts = line.split(":") if len(parts) != 2: continue if parts[0].strip() == 'left-context': variables['model_left_context'] = int(parts[1].strip()) elif parts[0].strip() == 'right-context': variables['model_right_context'] = int(parts[1].strip()) except ValueError: pass return variables
def add_nnet_context_info(config_dir, nnet_edits=None): """Create the 'vars' file that specifies model_left_context, etc.""" common_lib.execute_command("nnet3-init {0}/ref.config " "{0}/ref.raw".format(config_dir)) model = "{0}/ref.raw".format(config_dir) if nnet_edits is not None: model = "nnet3-copy --edits='{0}' {1} - |".format(nnet_edits, model) out = common_lib.get_command_stdout( 'nnet3-info "{0}" | head -n 4 '.format(model)) # out looks like this # left-context: 7 # right-context: 0 # num-parameters: 90543902 # modulus: 1 info = {} for line in out.split("\n"): parts = line.split(":") if len(parts) != 2: continue info[parts[0].strip()] = int(parts[1].strip()) # Writing the 'vars' file: # model_left_context=0 # model_right_context=7 vf = open('{0}/vars'.format(config_dir), 'w') vf.write('model_left_context={0}\n'.format(info['left-context'])) vf.write('model_right_context={0}\n'.format(info['right-context'])) vf.close()
def add_nnet_context_info(config_dir, nnet_edits=None, existing_model=None): """Create the 'vars' file that specifies model_left_context, etc.""" common_lib.execute_command("nnet3-init {0} {1}/ref.config " "{1}/ref.raw" "".format(existing_model if existing_model is not None else "", config_dir)) model = "{0}/ref.raw".format(config_dir) if nnet_edits is not None: model = "nnet3-copy --edits='{0}' {1} - |".format(nnet_edits, model) out = common_lib.get_command_stdout('nnet3-info "{0}" | head -n 4 ' .format(model)) # out looks like this # left-context: 7 # right-context: 0 # num-parameters: 90543902 # modulus: 1 info = {} for line in out.split("\n"): parts = line.split(":") if len(parts) != 2: continue info[parts[0].strip()] = int(parts[1].strip()) # Writing the 'vars' file: # model_left_context=0 # model_right_context=7 vf = open('{0}/vars'.format(config_dir), 'w') vf.write('model_left_context={0}\n'.format(info['left-context'])) vf.write('model_right_context={0}\n'.format(info['right-context'])) vf.close()
def get_model_component_info(model_filename): """ This function reads existing model (*.raw or *.mdl) and returns array of XconfigExistingLayer one per {input,output}-node or component-node with same 'name' used in the raw model and 'dim' equal to 'output-dim' for component-node and 'dim' for {input,output}-node. e.g. layer in *.mdl -> corresponding 'XconfigExistingLayer' layer 'input-node name=ivector dim=100' -> 'existing name=ivector dim=100' 'component-node name=tdnn1.affine ... input-dim=1000 ' 'output-dim=500' -> 'existing name=tdnn1.affine dim=500' """ all_layers = [] try: f = open(model_filename, 'r') except Exception as e: sys.exit("{0}: error reading model file '{1}'".format(sys.argv[0], model_filename, repr(e))) # use nnet3-info to get component names in the model. out = common_lib.get_command_stdout("""nnet3-info {0} | grep '\-node' """ """ """.format(model_filename)) # out contains all {output, input, component}-nodes used in model_filename # It can parse lines in out like: # i.e. input-node name=input dim=40 # component-node name=tdnn1.affine component=tdnn1.affine input=lda # input-dim=300 output-dim=512 layer_names = [] key_to_value = dict() for line in out.split("\n"): parts = line.split(" ") dim = -1 for field in parts: key_value = field.split("=") if len(key_value) == 2: key = key_value[0] value = key_value[1] if key == "name": # name=** layer_name = value elif key == "dim": # for input-node dim = int(value) elif key == "output-dim": # for component-node dim = int(value) if layer_name is not None and layer_name not in layer_names: layer_names.append(layer_name) key_to_value['name'] = layer_name assert(dim != -1) key_to_value['dim'] = dim all_layers.append(xlayers.XconfigExistingLayer('existing', key_to_value, all_layers)) if len(all_layers) == 0: raise RuntimeError("{0}: model filename '{1}' is empty.".format( sys.argv[0], model_filename)) f.close() return all_layers
def get_input_model_info(input_model): """ This function returns a dictionary with keys "model_left_context" and "model_right_context" and values equal to the left/right model contexts for input_model. This function is useful when using the --trainer.input-model option instead of initializing the model using configs. """ variables = {} try: out = common_lib.get_command_stdout("""nnet3-info {0} | """ """head -4 """.format(input_model)) # out looks like this # left-context: 7 # right-context: 0 # num-parameters: 90543902 # modulus: 1 for line in out.split("\n"): parts = line.split(":") if len(parts) != 2: continue if parts[0].strip() == 'left-context': variables['model_left_context'] = int(parts[1].strip()) elif parts[0].strip() == 'right-context': variables['model_right_context'] = int(parts[1].strip()) except ValueError: pass return variables
def check_model_contexts(config_dir, nnet_edits=None, existing_model=None): contexts = {} for file_name in ["init", "ref"]: if os.path.exists("{0}/{1}.config".format(config_dir, file_name)): contexts[file_name] = {} common_lib.execute_command( "nnet3-init {0} {1}/{2}.config " "{1}/{2}.raw" "".format( existing_model if existing_model is not None else "", config_dir, file_name, )) model = "{0}/{1}.raw".format(config_dir, file_name) if nnet_edits is not None and file_name != "init": model = "nnet3-copy --edits='{0}' {1} - |".format( nnet_edits, model) out = common_lib.get_command_stdout( 'nnet3-info "{0}"'.format(model)) # out looks like this # left-context: 7 # right-context: 0 # num-parameters: 90543902 # modulus: 1 # ... for line in out.split("\n")[:4]: # take 4 initial lines, parts = line.split(":") if len(parts) != 2: continue key = parts[0].strip() value = int(parts[1].strip()) if key in ["left-context", "right-context"]: contexts[file_name][key] = value if "init" in contexts: assert "ref" in contexts if "left-context" in contexts["init"] and "left-context" in contexts[ "ref"]: if (contexts["init"]["left-context"] > contexts["ref"]["left-context"]) or ( contexts["init"]["right-context"] > contexts["ref"]["right-context"]): raise Exception( "Model specified in {0}/init.config requires greater" " context than the model specified in {0}/ref.config." " This might be due to use of label-delay at the output" " in ref.config. Please use delay=$label_delay in the" " initial fixed-affine-layer of the network, to avoid" " this issue.")
def check_model_contexts(config_dir, nnet_edits=None, existing_model=None): contexts = {} for file_name in ['init', 'ref']: if os.path.exists('{0}/{1}.config'.format(config_dir, file_name)): contexts[file_name] = {} common_lib.execute_command("nnet3-init {0} {1}/{2}.config " "{1}/{2}.raw" "".format(existing_model if existing_model is not None else '', config_dir, file_name)) model = "{0}/{1}.raw".format(config_dir, file_name) if nnet_edits is not None: model = "nnet3-copy --edits='{0}' {1} - |".format(nnet_edits, model) out = common_lib.get_command_stdout('nnet3-info "{0}" | head -n 4 ' .format(model)) # out looks like this # left-context: 7 # right-context: 0 # num-parameters: 90543902 # modulus: 1 for line in out.split("\n"): parts = line.split(":") if len(parts) != 2: continue key = parts[0].strip() value = int(parts[1].strip()) if key in ['left-context', 'right-context']: contexts[file_name][key] = value if 'init' in contexts: assert('ref' in contexts) if ('left-context' in contexts['init'] and 'left-context' in contexts['ref']): if ((contexts['init']['left-context'] > contexts['ref']['left-context']) or (contexts['init']['right-context'] > contexts['ref']['right-context'])): raise Exception( "Model specified in {0}/init.config requires greater" " context than the model specified in {0}/ref.config." " This might be due to use of label-delay at the output" " in ref.config. Please use delay=$label_delay in the" " initial fixed-affine-layer of the network, to avoid" " this issue.")
def check_model_contexts(config_dir, nnet_edits=None, existing_model=None): contexts = {} for file_name in ['init', 'ref']: if os.path.exists('{0}/{1}.config'.format(config_dir, file_name)): contexts[file_name] = {} common_lib.execute_command("nnet3-init {0} {1}/{2}.config " "{1}/{2}.raw" "".format(existing_model if existing_model is not None else '', config_dir, file_name)) model = "{0}/{1}.raw".format(config_dir, file_name) if nnet_edits is not None: model = "nnet3-copy --edits='{0}' {1} - |".format(nnet_edits, model) out = common_lib.get_command_stdout('nnet3-info "{0}" | head -n 4 ' .format(model)) # out looks like this # left-context: 7 # right-context: 0 # num-parameters: 90543902 # modulus: 1 for line in out.split("\n"): parts = line.split(":") if len(parts) != 2: continue key = parts[0].strip() value = int(parts[1].strip()) if key in ['left-context', 'right-context']: contexts[file_name][key] = value if contexts.has_key('init'): assert(contexts.has_key('ref')) if (contexts['init'].has_key('left-context') and contexts['ref'].has_key('left-context')): if ((contexts['init']['left-context'] > contexts['ref']['left-context']) or (contexts['init']['right-context'] > contexts['ref']['right-context'])): raise Exception( "Model specified in {0}/init.config requires greater" " context than the model specified in {0}/ref.config." " This might be due to use of label-delay at the output" " in ref.config. Please use delay=$label_delay in the" " initial fixed-affine-layer of the network, to avoid" " this issue.")
def add_nnet_context_info(config_dir, nnet_edits=None, existing_model=None): """Create the 'vars' file that specifies model_left_context, etc.""" common_lib.execute_command( "nnet3-init {0} {1}/ref.config " "{1}/ref.raw" "".format(existing_model if existing_model is not None else "", config_dir)) model = "{0}/ref.raw".format(config_dir) if nnet_edits is not None: model = "nnet3-copy --edits='{0}' {1} - |".format(nnet_edits, model) out = common_lib.get_command_stdout('nnet3-info "{0}"'.format(model)) # Add by myself f_info = open("{0}/ref.raw.info".format(config_dir), 'w') print('# This file was created by the command:\n' '# nnet3-info "{0}"\n' '# which is called in func:\n' '# add_nnet_context_info(args.config_dir, args.nnet_edits,' '# existing_model=args.existing_model)\n' '# This func is in steps/nnet3/xconfig_to_configs.py'.format(model), file=f_info) print(out, file=f_info) f_info.close() # out looks like this # left-context: 7 # right-context: 0 # num-parameters: 90543902 # modulus: 1 # ... info = {} for line in out.split("\n")[:4]: # take 4 initial lines, parts = line.split(":") if len(parts) != 2: continue info[parts[0].strip()] = int(parts[1].strip()) # Writing the 'vars' file: # model_left_context=0 # model_right_context=7 vf = open('{0}/vars'.format(config_dir), 'w') vf.write('model_left_context={0}\n'.format(info['left-context'])) vf.write('model_right_context={0}\n'.format(info['right-context'])) vf.close()
def parse_train_logs(exp_dir): train_log_files = "%s/log/train.*.log" % (exp_dir) train_log_lines = common_lib.get_command_stdout( 'grep -e Accounting {0}'.format(train_log_files)) parse_regex = re.compile(".*train\.([0-9]+)\.([0-9]+)\.log:# " "Accounting: time=([0-9]+) thread.*") train_times = {} for line in train_log_lines.split('\n'): mat_obj = parse_regex.search(line) if mat_obj is not None: groups = mat_obj.groups() try: train_times[int(groups[0])][int(groups[1])] = float(groups[2]) except KeyError: train_times[int(groups[0])] = {} train_times[int(groups[0])][int(groups[1])] = float(groups[2]) iters = train_times.keys() for iter in iters: values = train_times[iter].values() train_times[iter] = max(values) return train_times
def get_train_times(exp_dir): train_log_files = "%s/log/" % (exp_dir) train_log_names = "train.*.log" train_log_lines = common_lib.get_command_stdout( 'find {0} -name "{1}" | xargs grep -H -e Accounting'.format(train_log_files,train_log_names)) parse_regex = re.compile(".*train\.([0-9]+)\.([0-9]+)\.log:# " "Accounting: time=([0-9]+) thread.*") train_times = {} for line in train_log_lines.split('\n'): mat_obj = parse_regex.search(line) if mat_obj is not None: groups = mat_obj.groups() try: train_times[int(groups[0])][int(groups[1])] = float(groups[2]) except KeyError: train_times[int(groups[0])] = {} train_times[int(groups[0])][int(groups[1])] = float(groups[2]) iters = train_times.keys() for iter in iters: values = train_times[iter].values() train_times[iter] = max(values) return train_times
def train(args, run_opts): """ The main function for training. Args: args: a Namespace object with the required parameters obtained from the function process_args() run_opts: RunOpts object obtained from the process_args() """ arg_string = pprint.pformat(vars(args)) logger.info("Arguments for the experiment\n{0}".format(arg_string)) # Set some variables. config_dir = '{0}/configs'.format(args.dir) am_var_file = '{0}/vars_am'.format(config_dir) xvec_var_file = '{0}/vars_xvec'.format(config_dir) am_variables = common_train_lib.parse_generic_config_vars_file(am_var_file) xvec_variables = common_train_lib.parse_generic_config_vars_file(xvec_var_file) # Set some variables. try: am_model_left_context = am_variables['model_left_context'] am_model_right_context = am_variables['model_right_context'] xvec_model_left_context = xvec_variables['model_left_context'] xvec_model_right_context = xvec_variables['model_right_context'] except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) am_left_context = am_model_left_context am_right_context = am_model_right_context xvec_left_context = xvec_model_left_context xvec_right_context = xvec_model_right_context # Initialize as "raw" nnet, prior to training the LDA-like preconditioning # matrix. This first config just does any initial splicing that we do; # we do this as it's a convenient way to get the stats for the 'lda-like' # transform. if (args.stage <= -5) and os.path.exists(args.dir+"/configs/init.config"): logger.info("Initializing a basic network for estimating " "preconditioning matrix") common_lib.execute_command( """{command} {dir}/log/nnet_init.log \ nnet3-init --srand=-2 {dir}/configs/init.config \ {dir}/init.raw""".format(command=run_opts.command, dir=args.dir)) am_egs_dir = args.am_egs_dir xvec_egs_dir = args.xvec_egs_dir am_output_name = args.am_output_name xvec_output_name = args.xvec_output_name am_weight = args.am_weight xvec_weight = args.xvec_weight feat_dim = int(common_lib.get_command_stdout("cat {0}/info/feat_dim".format(am_egs_dir))) num_archives = int(common_lib.get_command_stdout("cat {0}/info/num_archives".format(am_egs_dir))) tmp_feat_dim = int(common_lib.get_command_stdout("cat {0}/info/feat_dim".format(xvec_egs_dir))) tmp_num_archives = int(common_lib.get_command_stdout("cat {0}/info/num_archives".format(xvec_egs_dir))) # frames_per_eg is no longer a parameter but load from am_egs/info/frames_per_eg am_frames_per_eg = int(common_lib.get_command_stdout("cat {0}/info/frames_per_eg".format(am_egs_dir))) if feat_dim != tmp_feat_dim or num_archives*am_frames_per_eg != tmp_num_archives: raise Exception('The am egs and xvec egs do not match') if args.num_jobs_final > num_archives: raise Exception('num_jobs_final cannot exceed the number of archives ' 'in the egs directory') # # No need to copy files for decoding # common_train_lib.copy_egs_properties_to_exp_dir(am_egs_dir, args.dir) if args.stage <= -3 and os.path.exists(args.dir+"/configs/init.config"): logger.info('Computing the preconditioning matrix for input features') train_lib.common.compute_preconditioning_matrix( args.dir, egs_dir, num_archives, run_opts, max_lda_jobs=args.max_lda_jobs, rand_prune=args.rand_prune) if args.stage <= -1: logger.info("Preparing the initial network.") common_train_lib.prepare_initial_network(args.dir, run_opts) # set num_iters so that as close as possible, we process the data # $num_epochs times, i.e. $num_iters*$avg_num_jobs) == # $num_epochs*$num_archives, where # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2. num_archives_expanded = num_archives * am_frames_per_eg num_archives_to_process = int(args.num_epochs * num_archives_expanded) num_archives_processed = 0 num_iters = ((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final)) # If do_final_combination is True, compute the set of models_to_combine. # Otherwise, models_to_combine will be none. if args.do_final_combination: models_to_combine = common_train_lib.get_model_combine_iters( num_iters, args.num_epochs, num_archives_expanded, args.max_models_combine, args.num_jobs_final) else: models_to_combine = None logger.info("Training will run for {0} epochs = " "{1} iterations".format(args.num_epochs, num_iters)) for iter in range(num_iters): if (args.exit_stage is not None) and (iter == args.exit_stage): logger.info("Exiting early due to --exit-stage {0}".format(iter)) return current_num_jobs = int(0.5 + args.num_jobs_initial + (args.num_jobs_final - args.num_jobs_initial) * float(iter) / num_iters) if args.stage <= iter: lrate = common_train_lib.get_learning_rate(iter, current_num_jobs, num_iters, num_archives_processed, num_archives_to_process, args.initial_effective_lrate, args.final_effective_lrate) shrinkage_value = 1.0 - (args.proportional_shrink * lrate) if shrinkage_value <= 0.5: raise Exception("proportional-shrink={0} is too large, it gives " "shrink-value={1}".format(args.proportional_shrink, shrinkage_value)) percent = num_archives_processed * 100.0 / num_archives_to_process epoch = (num_archives_processed * args.num_epochs / num_archives_to_process) shrink_info_str = '' if shrinkage_value != 1.0: shrink_info_str = 'shrink: {0:0.5f}'.format(shrinkage_value) logger.info("Iter: {0}/{1} " "Epoch: {2:0.2f}/{3:0.1f} ({4:0.1f}% complete) " "lr: {5:0.6f} {6}".format(iter, num_iters - 1, epoch, args.num_epochs, percent, lrate, shrink_info_str)) train_lib.common.train_cvector_one_iteration( dir=args.dir, iter=iter, srand=args.srand, am_output_name=am_output_name, am_weight=am_weight, am_egs_dir=am_egs_dir, xvec_output_name=xvec_output_name, xvec_weight=xvec_weight, xvec_egs_dir=xvec_egs_dir, num_jobs=current_num_jobs, num_archives_processed=num_archives_processed, num_archives=num_archives, learning_rate=lrate, minibatch_size_str=args.minibatch_size, momentum=args.momentum, max_param_change=args.max_param_change, shuffle_buffer_size=args.shuffle_buffer_size, run_opts=run_opts, am_frames_per_eg=am_frames_per_eg, dropout_edit_string=common_train_lib.get_dropout_edit_string( args.dropout_schedule, float(num_archives_processed) / num_archives_to_process, iter), shrinkage_value=shrinkage_value, get_raw_nnet_from_am=False, backstitch_training_scale=args.backstitch_training_scale, backstitch_training_interval=args.backstitch_training_interval) if args.cleanup: # do a clean up everythin but the last 2 models, under certain # conditions common_train_lib.remove_model( args.dir, iter-2, num_iters, models_to_combine, args.preserve_model_interval, get_raw_nnet_from_am=False) if args.email is not None: reporting_iter_interval = num_iters * args.reporting_interval if iter % reporting_iter_interval == 0: # lets do some reporting [report, times, data] = ( nnet3_log_parse.generate_acc_logprob_report(args.dir)) message = report subject = ("Update : Expt {dir} : " "Iter {iter}".format(dir=args.dir, iter=iter)) common_lib.send_mail(message, subject, args.email) num_archives_processed = num_archives_processed + current_num_jobs # when we do final combination, just use the xvector egs if args.stage <= num_iters: if args.do_final_combination: logger.info("Doing final combination to produce final.mdl") train_lib.common.combine_models( dir=args.dir, num_iters=num_iters, models_to_combine=models_to_combine, egs_dir=xvec_egs_dir, minibatch_size_str="64", run_opts=run_opts, get_raw_nnet_from_am=False, max_objective_evaluations=args.max_objective_evaluations, use_egs=True) # sum_to_one_penalty=args.combine_sum_to_one_penalty, else: common_lib.force_symlink("{0}.raw".format(num_iters), "{0}/final.raw".format(args.dir)) if args.cleanup: logger.info("Cleaning up the experiment directory " "{0}".format(args.dir)) remove_egs = False common_train_lib.clean_nnet_dir( nnet_dir=args.dir, num_iters=num_iters, egs_dir=am_egs_dir, preserve_model_interval=args.preserve_model_interval, remove_egs=remove_egs, get_raw_nnet_from_am=False) # TODO: we may trace other output nodes expect for "output" # do some reporting outputs_list = common_train_lib.get_outputs_list("{0}/final.raw".format( args.dir), get_raw_nnet_from_am=False) if 'output' in outputs_list: [report, times, data] = nnet3_log_parse.generate_acc_logprob_report(args.dir) if args.email is not None: common_lib.send_mail(report, "Update : Expt {0} : " "complete".format(args.dir), args.email) with open("{dir}/accuracy.{output_name}.report".format(dir=args.dir, output_name="output"), "w") as f: f.write(report) common_lib.execute_command("subtools/kaldi/steps/info/nnet3_dir_info.pl " "{0}".format(args.dir))
def parse_progress_logs_for_clipped_proportion(exp_dir): """ Parse progress logs for clipped proportion stats. e.g. for a line that is parsed from progress.*.log: exp/chain/cwrnn_trial2_ld5_sp/log/progress.245.log:component name=BLstm1_forward_c type=ClipGradientComponent, dim=512, norm-based-clipping=true, clipping-threshold=30, clipped-proportion=0.000565527, self-repair-clipped-proportion-threshold=0.01, self-repair-target=0, self-repair-scale=1 """ progress_log_files = "%s/log/progress.*.log" % (exp_dir) component_names = set([]) progress_log_lines = common_lib.get_command_stdout( 'grep -e "{0}" {1}'.format( "clipped-proportion", progress_log_files), require_zero_status=False) parse_regex = re.compile(".*progress\.([0-9]+)\.log:component " "name=(.*) type=.* " "clipped-proportion=([0-9\.e\-]+)") cp_per_component_per_iter = {} max_iteration = 0 component_names = set([]) for line in progress_log_lines.split("\n"): mat_obj = parse_regex.search(line) if mat_obj is None: if line.strip() == "": continue raise MalformedClippedProportionLineException(line) groups = mat_obj.groups() iteration = int(groups[0]) max_iteration = max(max_iteration, iteration) name = groups[1] clipped_proportion = float(groups[2]) if clipped_proportion > 1: raise MalformedClippedProportionLineException(line) if iteration not in cp_per_component_per_iter: cp_per_component_per_iter[iteration] = {} cp_per_component_per_iter[iteration][name] = clipped_proportion component_names.add(name) component_names = list(component_names) component_names.sort() # re arranging the data into an array # and into an cp_per_iter_per_component cp_per_iter_per_component = {} for component_name in component_names: cp_per_iter_per_component[component_name] = [] data = [] data.append(["iteration"]+component_names) for iter in range(max_iteration+1): if iter not in cp_per_component_per_iter: continue comp_dict = cp_per_component_per_iter[iter] row = [iter] for component in component_names: try: row.append(comp_dict[component]) cp_per_iter_per_component[component].append( [iter, comp_dict[component]]) except KeyError: # if clipped proportion is not available for a particular # component it is set to None # this usually happens during layer-wise discriminative # training row.append(None) data.append(row) return {'table': data, 'cp_per_component_per_iter': cp_per_component_per_iter, 'cp_per_iter_per_component': cp_per_iter_per_component}
def parse_progress_logs_for_param_diff(exp_dir, pattern): """ Parse progress logs for per-component parameter differences. e.g. for a line that is parsed from progress.*.log: exp/chain/cwrnn_trial2_ld5_sp/log/progress.245.log:LOG (nnet3-show-progress:main():nnet3-show-progress.cc:144) Relative parameter differences per layer are [ Cwrnn1_T3_W_r:0.0171537 Cwrnn1_T3_W_x:1.33338e-07 Cwrnn1_T2_W_r:0.048075 Cwrnn1_T2_W_x:1.34088e-07 Cwrnn1_T1_W_r:0.0157277 Cwrnn1_T1_W_x:0.0212704 Final_affine:0.0321521 Cwrnn2_T3_W_r:0.0212082 Cwrnn2_T3_W_x:1.33691e-07 Cwrnn2_T2_W_r:0.0212978 Cwrnn2_T2_W_x:1.33401e-07 Cwrnn2_T1_W_r:0.014976 Cwrnn2_T1_W_x:0.0233588 Cwrnn3_T3_W_r:0.0237165 Cwrnn3_T3_W_x:1.33184e-07 Cwrnn3_T2_W_r:0.0239754 Cwrnn3_T2_W_x:1.3296e-07 Cwrnn3_T1_W_r:0.0194809 Cwrnn3_T1_W_x:0.0271934 ] """ if pattern not in set(["Relative parameter differences", "Parameter differences"]): raise Exception("Unknown value for pattern : {0}".format(pattern)) progress_log_files = "%s/log/progress.*.log" % (exp_dir) progress_per_iter = {} component_names = set([]) progress_log_lines = common_lib.get_command_stdout( 'grep -e "{0}" {1}'.format(pattern, progress_log_files)) parse_regex = re.compile(".*progress\.([0-9]+)\.log:" "LOG.*{0}.*\[(.*)\]".format(pattern)) for line in progress_log_lines.split("\n"): mat_obj = parse_regex.search(line) if mat_obj is None: continue groups = mat_obj.groups() iteration = groups[0] differences = parse_difference_string(groups[1]) component_names = component_names.union(differences.keys()) progress_per_iter[int(iteration)] = differences component_names = list(component_names) component_names.sort() # rearranging the parameter differences available per iter # into parameter differences per component progress_per_component = {} for cn in component_names: progress_per_component[cn] = {} max_iter = max(progress_per_iter.keys()) total_missing_iterations = 0 gave_user_warning = False for iter in range(max_iter + 1): try: component_dict = progress_per_iter[iter] except KeyError: continue for component_name in component_names: try: progress_per_component[component_name][iter] = component_dict[ component_name] except KeyError: total_missing_iterations += 1 # the component was not found this iteration, may be because of # layerwise discriminative training pass if (total_missing_iterations/len(component_names) > 20 and not gave_user_warning and logger is not None): logger.warning("There are more than {0} missing iterations per " "component. Something might be wrong.".format( total_missing_iterations/len(component_names))) gave_user_warning = True return {'progress_per_component': progress_per_component, 'component_names': component_names, 'max_iter': max_iter}
def parse_rnnlm_prob_logs(exp_dir, key='objf'): train_prob_files = "%s/log/train.*.*.log" % (exp_dir) valid_prob_files = "%s/log/compute_prob.*.log" % (exp_dir) train_prob_strings = common_lib.get_command_stdout( 'grep -e {0} {1}'.format(key, train_prob_files)) valid_prob_strings = common_lib.get_command_stdout( 'grep -e {0} {1}'.format(key, valid_prob_files)) # LOG # (rnnlm-train[5.3.36~8-2ec51]:PrintStatsOverall():rnnlm-core-training.cc:118) # Overall objf is (-4.426 + -0.008287) = -4.435 over 4.503e+06 words (weighted) # in 1117 minibatches; exact = (-4.426 + 0) = -4.426 # LOG # (rnnlm-compute-prob[5.3.36~8-2ec51]:PrintStatsOverall():rnnlm-core-training.cc:118) # Overall objf is (-4.677 + -0.002067) = -4.679 over 1.08e+05 words (weighted) # in 27 minibatches; exact = (-4.677 + 0.002667) = -4.674 parse_regex_train = re.compile( ".*train\.([0-9]+).1.log:LOG " ".rnnlm-train.*:PrintStatsOverall..:" "rnnlm.*training.cc:[0-9]+. Overall ([a-zA-Z\-]+) is " ".*exact = \(.+\) = ([0-9.\-\+e]+)") parse_regex_valid = re.compile( ".*compute_prob\.([0-9]+).log:LOG " ".rnnlm.*compute-prob.*:PrintStatsOverall..:" "rnnlm.*training.cc:[0-9]+. Overall ([a-zA-Z\-]+) is " ".*exact = \(.+\) = ([0-9.\-\+e]+)") train_objf = {} valid_objf = {} for line in train_prob_strings.split('\n'): mat_obj = parse_regex_train.search(line) if mat_obj is not None: groups = mat_obj.groups() if groups[1] == key: train_objf[int(groups[0])] = groups[2] if not train_objf: raise KaldiLogParseException("Could not find any lines with {k} in " " {l}".format(k=key, l=train_prob_files)) for line in valid_prob_strings.split('\n'): mat_obj = parse_regex_valid.search(line) if mat_obj is not None: groups = mat_obj.groups() if groups[1] == key: valid_objf[int(groups[0])] = groups[2] if not valid_objf: raise KaldiLogParseException("Could not find any lines with {k} in " " {l}".format(k=key, l=valid_prob_files)) iters = list(set(valid_objf.keys()).intersection(train_objf.keys())) if not iters: raise KaldiLogParseException("Could not any common iterations with" " key {k} in both {tl} and {vl}".format( k=key, tl=train_prob_files, vl=valid_prob_files)) iters.sort() return map(lambda x: (int(x), float(train_objf[x]), float(valid_objf[x])), iters)