示例#1
0
    'Check if the utterance is true given the KB. Only work for simulated data.'
)
add_scenario_arguments(parser)
add_lexicon_arguments(parser)
add_dataset_arguments(parser)
add_neural_system_arguments(parser)
add_heuristic_system_arguments(parser)
args = parser.parse_args()
logstats.init(args.stats_file)
if args.random_seed:
    random.seed(args.random_seed)
    np.random.seed(args.random_seed)

schema = Schema(args.schema_path)
scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path))
lexicon = Lexicon(schema, args.learned_lex, stop_words=args.stop_words)
if args.inverse_lexicon:
    realizer = InverseLexicon(schema, args.inverse_lexicon)
else:
    realizer = None

if args.train_max_examples is None:
    args.train_max_examples = scenario_db.size
if args.test_max_examples is None:
    args.test_max_examples = scenario_db.size


def get_system(name):
    if name == 'simple':
        return SimpleSystem(lexicon, realizer=realizer)
    elif name == 'heuristic':
示例#2
0
                        type=str)
    parser.add_argument("--transcripts",
                        help="Json file of all transcripts collected")
    parser.add_argument("--output", help="Output path")
    add_lexicon_arguments(parser)

    args = parser.parse_args()

    path = args.schema
    schema = Schema(path)

    re_pattern = r"[\w*\']+|[(\w*&)]+|[\w]+|\.|\(|\)|\\|\"|\/|;|\#|\$|\%|\@|\{|\}|\:"

    lexicon = Lexicon(schema,
                      learned_lex=False,
                      entity_ranker=None,
                      scenarios_json=args.scenarios_json,
                      stop_words=args.stop_words)

    with open(args.annotated_examples_path, "r") as f:
        annotated_examples = json.load(f)

    with open(args.transcripts, "r") as f:
        examples = json.load(f)

    if not args.output:
        fout = open("inverse_lexicon_data.txt", "w")
    else:
        fout = open(args.output, 'w')

    # Process annotated examples
示例#3
0
    raw_chats = read_json(args.dialogue_transcripts)
    uuid_to_chat = {chat['uuid']: chat for chat in raw_chats}
    schema = Schema(args.schema_path)
    scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path))
    dialogue_ids = filter(raw_eval, uuid_to_chat)

    for eval_ in raw_eval:
        read_eval(eval_, question_scores, mask=dialogue_ids)

    if args.hist:
        hist(question_scores, args.outdir, partner=args.partner)

    if args.summary:
        summary = summarize(question_scores)
        write_json(summary, args.stats)

    if args.analyze:
        schema = Schema(args.schema_path)
        lexicon = Lexicon(schema,
                          False,
                          scenarios_json=args.scenarios_path,
                          stop_words=args.stop_words)
        preprocessor = Preprocessor(schema, lexicon, 'canonical', 'canonical',
                                    'canonical')
        analyze(question_scores, uuid_to_chat, preprocessor)

    # Visualize
    if args.html_output:
        visualize(args.viewer_mode, args.html_output, question_scores,
                  uuid_to_chat)
示例#4
0
__author__ = 'anushabala'

from argparse import ArgumentParser
from src.basic.scenario_db import ScenarioDB, add_scenario_arguments
from src.basic.schema import Schema
import json
from get_data_statistics import add_statistics_arguments, compute_statistics
from visualize_data import add_visualization_arguments, visualize_transcripts

if __name__ == "__main__":
    parser = ArgumentParser()
    add_scenario_arguments(parser)
    add_statistics_arguments(parser)
    add_visualization_arguments(parser)
    parser.add_argument('--transcripts',
                        type=str,
                        default='transcripts.json',
                        help='Path to directory containing transcripts')
    parser.add_argument('--domain',
                        type=str,
                        choices=['MutualFriends', 'Matchmaking'])

    args = parser.parse_args()
    schema = Schema(args.schema_path, args.domain)
    scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path))
    transcripts = json.load(open(args.transcripts, 'r'))
    lexicon = Lexicon(schema, False, scenarios_json=args.scenarios_path)

    visualize_transcripts(args, scenario_db, transcripts)
    compute_statistics(args, lexicon, schema, scenario_db, transcripts)