def main(model_folder, example_folder): config_file, caffemodel_file, net_proto = get_filenames(model_folder) sbd.SbdConfig(config_file) ctm_file, pitch_file, energy_file = get_audio_files(example_folder) # parse ctm_file, pitch_file and energy_file parser = AudioParser(ctm_file, pitch_file, energy_file) parser.parse() classifier = load_audio_classifier(model_folder) data = classifier.predict_audio(parser) print(data)
def classifyAudioLexical(): assert request.method == 'POST' # get example folder example_folder = os.path.join(route_folder, AUDIO_EXAMPLE_FOLDER, request.form['example']) ctm_file, pitch_file, energy_file = get_audio_files(example_folder) # parse ctm_file, pitch_file and energy_file parser = AudioParser() talks = parser.parse(ctm_file) # predict audio load_config(AUDIO_MODEL_FOLDER, request.form['audio_folder']) audio_probs = audio_classifier.predict(InputAudio(talks)) # predict lexical load_config(LEXICAL_MODEL_FOLDER, request.form['lexical_folder']) input_text = InputText(talks) lexical_probs = lexical_classifier.predict(input_text) # get config parameter (lexical_window_size, lexical_punctuation_pos, pos_tagging) = lexical_classifier.get_lexical_parameter() (audio_window_size, audio_punctuation_pos) = audio_classifier.get_audio_parameter() # write audio results audio_classes = ["NONE", "PERIOD"] all_audio_probs = convert_probabilities(len(input_text.tokens), audio_punctuation_pos, audio_probs, audio_classes) file_name = os.path.join(route_folder, AUDIO_EXAMPLE_FOLDER, request.form['example'] + ".result") resultWriter = ResultWriter(audio_classes) resultWriter.writeToFile(file_name, input_text.tokens, all_audio_probs) # fusion fusion = ThresholdFusion() fusion.init_parameters(lexical_punctuation_pos, lexical_window_size, audio_punctuation_pos, audio_window_size) fusion_probs = fusion.fuse(len(input_text.tokens), lexical_probs, audio_probs) # convert it into json jsonConverter = JsonConverter(lexical_punctuation_pos, lexical_window_size, audio_punctuation_pos, audio_window_size, pos_tagging) data = jsonConverter.convert_fusion(input_text.tokens, fusion_probs, lexical_probs, audio_probs) return json.dumps(data)
parser.add_argument('ctm_file', help="path to ctm_file", default="evaluation_data/data/tst2011_0.ctm", nargs='?') parser.add_argument('vectorfile', help='the google news word vector', default='evaluation_data/GoogleNews-vectors-negative300.bin', nargs='?') parser.add_argument('lexical_model_folder', help="path to lexical models", default="evaluation_data/lexical_models", nargs='?') parser.add_argument('audio_model_folder', help="path to audio models", default="evaluation_data/audio_models", nargs='?') parser.add_argument('--release', help="whether to test in release mode", action='store_true') args = parser.parse_args() if args.release: vector = Word2VecFile(args.vectorfile) else: vector = None # get all talks print("Reading all talks ...") audio_parser = AudioParser() talks = audio_parser.parse(args.ctm_file) # get all lexical models lexical_models = [] for dirname, dirnames, filenames in os.walk(args.lexical_model_folder): for subdirname in dirnames: lexical_models.append(os.path.join(dirname, subdirname)) # get all audio models audio_models = [] for dirname, dirnames, filenames in os.walk(args.audio_model_folder): for subdirname in dirnames: audio_models.append(os.path.join(dirname, subdirname))