'Check if the utterance is true given the KB. Only work for simulated data.' ) add_scenario_arguments(parser) add_lexicon_arguments(parser) add_dataset_arguments(parser) add_neural_system_arguments(parser) add_heuristic_system_arguments(parser) args = parser.parse_args() logstats.init(args.stats_file) if args.random_seed: random.seed(args.random_seed) np.random.seed(args.random_seed) schema = Schema(args.schema_path) scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path)) lexicon = Lexicon(schema, args.learned_lex, stop_words=args.stop_words) if args.inverse_lexicon: realizer = InverseLexicon(schema, args.inverse_lexicon) else: realizer = None if args.train_max_examples is None: args.train_max_examples = scenario_db.size if args.test_max_examples is None: args.test_max_examples = scenario_db.size def get_system(name): if name == 'simple': return SimpleSystem(lexicon, realizer=realizer) elif name == 'heuristic':
type=str) parser.add_argument("--transcripts", help="Json file of all transcripts collected") parser.add_argument("--output", help="Output path") add_lexicon_arguments(parser) args = parser.parse_args() path = args.schema schema = Schema(path) re_pattern = r"[\w*\']+|[(\w*&)]+|[\w]+|\.|\(|\)|\\|\"|\/|;|\#|\$|\%|\@|\{|\}|\:" lexicon = Lexicon(schema, learned_lex=False, entity_ranker=None, scenarios_json=args.scenarios_json, stop_words=args.stop_words) with open(args.annotated_examples_path, "r") as f: annotated_examples = json.load(f) with open(args.transcripts, "r") as f: examples = json.load(f) if not args.output: fout = open("inverse_lexicon_data.txt", "w") else: fout = open(args.output, 'w') # Process annotated examples
raw_chats = read_json(args.dialogue_transcripts) uuid_to_chat = {chat['uuid']: chat for chat in raw_chats} schema = Schema(args.schema_path) scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path)) dialogue_ids = filter(raw_eval, uuid_to_chat) for eval_ in raw_eval: read_eval(eval_, question_scores, mask=dialogue_ids) if args.hist: hist(question_scores, args.outdir, partner=args.partner) if args.summary: summary = summarize(question_scores) write_json(summary, args.stats) if args.analyze: schema = Schema(args.schema_path) lexicon = Lexicon(schema, False, scenarios_json=args.scenarios_path, stop_words=args.stop_words) preprocessor = Preprocessor(schema, lexicon, 'canonical', 'canonical', 'canonical') analyze(question_scores, uuid_to_chat, preprocessor) # Visualize if args.html_output: visualize(args.viewer_mode, args.html_output, question_scores, uuid_to_chat)
__author__ = 'anushabala' from argparse import ArgumentParser from src.basic.scenario_db import ScenarioDB, add_scenario_arguments from src.basic.schema import Schema import json from get_data_statistics import add_statistics_arguments, compute_statistics from visualize_data import add_visualization_arguments, visualize_transcripts if __name__ == "__main__": parser = ArgumentParser() add_scenario_arguments(parser) add_statistics_arguments(parser) add_visualization_arguments(parser) parser.add_argument('--transcripts', type=str, default='transcripts.json', help='Path to directory containing transcripts') parser.add_argument('--domain', type=str, choices=['MutualFriends', 'Matchmaking']) args = parser.parse_args() schema = Schema(args.schema_path, args.domain) scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path)) transcripts = json.load(open(args.transcripts, 'r')) lexicon = Lexicon(schema, False, scenarios_json=args.scenarios_path) visualize_transcripts(args, scenario_db, transcripts) compute_statistics(args, lexicon, schema, scenario_db, transcripts)