Exemplo n.º 1
0
def init(path):
    global stats_path, STATS
    stats_path = path
    try:
        STATS = read_json(stats_path)
    except Exception:
        STATS = {}
Exemplo n.º 2
0
    def __init__(self, schema, lexicon, model_path, fact_check, decoding, timed_session=False, consecutive_entity=True, realizer=None):
        super(NeuralSystem, self).__init__()
        self.schema = schema
        self.lexicon = lexicon
        self.timed_session = timed_session
        self.consecutive_entity = consecutive_entity

        # Load arguments
        args_path = os.path.join(model_path, 'config.json')
        config = read_json(args_path)
        config['batch_size'] = 1
        config['gpu'] = 0  # Don't need GPU for batch_size=1
        config['decoding'] = decoding
        args = argparse.Namespace(**config)

        mappings_path = os.path.join(model_path, 'vocab.pkl')
        mappings = read_pickle(mappings_path)
        vocab = mappings['vocab']

        # TODO: different models have the same key now
        args.dropout = 0
        logstats.add_args('model_args', args)
        model = build_model(schema, mappings, args)

        # Tensorflow config
        if args.gpu == 0:
            print 'GPU is disabled'
            config = tf.ConfigProto(device_count = {'GPU': 0})
        else:
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = 0.5, allow_growth=True)
            config = tf.ConfigProto(device_count = {'GPU': 1}, gpu_options=gpu_options)

        # NOTE: need to close the session when done
        tf_session = tf.Session(config=config)
        tf.initialize_all_variables().run(session=tf_session)

        # Load TF model parameters
        ckpt = tf.train.get_checkpoint_state(model_path+'-best')
        assert ckpt, 'No checkpoint found'
        assert ckpt.model_checkpoint_path, 'No model path found in checkpoint'
        saver = tf.train.Saver()
        saver.restore(tf_session, ckpt.model_checkpoint_path)

        self.model_name = args.model
        if self.model_name == 'attn-copy-encdec':
            args.entity_target_form = 'graph'
            copy = True
        else:
            copy = False
        preprocessor = Preprocessor(schema, lexicon, args.entity_encoding_form, args.entity_decoding_form, args.entity_target_form, args.prepend)
        textint_map = TextIntMap(vocab, mappings['entity'], preprocessor)

        Env = namedtuple('Env', ['model', 'tf_session', 'preprocessor', 'vocab', 'copy', 'textint_map', 'stop_symbol', 'remove_symbols', 'max_len', 'evaluator', 'prepend', 'consecutive_entity', 'realizer'])
        self.env = Env(model, tf_session, preprocessor, mappings['vocab'], copy, textint_map, stop_symbol=vocab.to_ind(markers.EOS), remove_symbols=map(vocab.to_ind, (markers.EOS, markers.PAD)), max_len=20, evaluator=FactEvaluator() if fact_check else None, prepend=args.prepend, consecutive_entity=self.consecutive_entity, realizer=realizer)
Exemplo n.º 3
0
    help=
    'Check if the utterance is true given the KB. Only work for simulated data.'
)
add_scenario_arguments(parser)
add_lexicon_arguments(parser)
add_dataset_arguments(parser)
add_neural_system_arguments(parser)
add_heuristic_system_arguments(parser)
args = parser.parse_args()
logstats.init(args.stats_file)
if args.random_seed:
    random.seed(args.random_seed)
    np.random.seed(args.random_seed)

schema = Schema(args.schema_path)
scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path))
lexicon = Lexicon(schema, args.learned_lex, stop_words=args.stop_words)
if args.inverse_lexicon:
    realizer = InverseLexicon(schema, args.inverse_lexicon)
else:
    realizer = None

if args.train_max_examples is None:
    args.train_max_examples = scenario_db.size
if args.test_max_examples is None:
    args.test_max_examples = scenario_db.size


def get_system(name):
    if name == 'simple':
        return SimpleSystem(lexicon, realizer=realizer)
Exemplo n.º 4
0
'''
Output all scenarios in a chat transcript.
'''

import sys
import argparse
from src.basic.util import read_json, write_json
from src.basic.scenario_db import Scenario, ScenarioDB
from src.basic.schema import Schema

parser = argparse.ArgumentParser()
parser.add_argument('--chats')
parser.add_argument('--scenarios')
parser.add_argument('--schema-path')
args = parser.parse_args()

chats = read_json(args.chats)
schema = Schema(args.schema_path)
scenarios = []
for chat in chats:
    scenarios.append(Scenario.from_dict(schema, chat['scenario']))
scenario_db = ScenarioDB(scenarios)
write_json(scenario_db.to_dict(), args.scenarios)
Exemplo n.º 5
0
                        action='store_true',
                        help='Output html files')
    parser.add_argument('--outdir', default='.', help='Output dir')
    parser.add_argument('--stats',
                        default='stats.json',
                        help='Path to stats file')
    parser.add_argument('--partner',
                        default=False,
                        action='store_true',
                        help='Whether this is from partner survey')
    add_scenario_arguments(parser)
    add_lexicon_arguments(parser)
    add_visualization_arguments(parser)
    args = parser.parse_args()

    raw_eval = [read_json(trans) for trans in args.eval_transcripts]
    question_scores = defaultdict(lambda: defaultdict(list))
    raw_chats = read_json(args.dialogue_transcripts)
    uuid_to_chat = {chat['uuid']: chat for chat in raw_chats}
    schema = Schema(args.schema_path)
    scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path))
    dialogue_ids = filter(raw_eval, uuid_to_chat)

    for eval_ in raw_eval:
        read_eval(eval_, question_scores, mask=dialogue_ids)

    if args.hist:
        hist(question_scores, args.outdir, partner=args.partner)

    if args.summary:
        summary = summarize(question_scores)
Exemplo n.º 6
0
parser = ArgumentParser()
add_scenario_arguments(parser)
add_lexicon_arguments(parser)
parser.add_argument('--transcripts',
                    type=str,
                    default='transcripts.json',
                    help='Path to directory containing transcripts')
parser.add_argument('--eval-transcripts',
                    type=str,
                    default='transcripts.json',
                    help='Path to directory containing transcripts')

parsed_args = parser.parse_args()
schema = Schema(parsed_args.schema_path)
scenario_db = ScenarioDB.from_dict(schema,
                                   read_json(parsed_args.scenarios_path))
transcripts = read_json(parsed_args.transcripts)
eval_transcripts = read_json(parsed_args.eval_transcripts)
lexicon = Lexicon(schema,
                  False,
                  scenarios_json=parsed_args.scenarios_path,
                  stop_words=parsed_args.stop_words)
preprocessor = Preprocessor(schema, lexicon, 'canonical', 'canonical',
                            'canonical', False)


def compute_statistics(chats):
    speech_act_summary_map = defaultdict(int)
    total = 0.
    for agent, raw in chats:
        ex = Example.from_dict(scenario_db, raw)
Exemplo n.º 7
0
if __name__ == "__main__":
    parser = ArgumentParser()
    add_scenario_arguments(parser)
    add_visualization_arguments(parser)
    parser.add_argument('--transcripts',
                        type=str,
                        default='transcripts.json',
                        help='Path to json file containing chats')
    parser.add_argument('--survey_file',
                        type=str,
                        default=None,
                        help='Path to json file containing survey')

    args = parser.parse_args()
    schema = Schema(args.schema_path)
    # scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path))
    transcripts = read_json(args.transcripts)
    # import pdb; pdb.set_trace()
    survey = read_json(args.survey_file)[1]
    html_output = args.html_output

    if args.viewer_mode:
        # External js and css
        write_viewer_data(html_output, transcripts, responses=survey)
    else:
        # Inline style
        visualize_transcripts(html_output,
                              transcripts,
                              responses=survey,
                              css_file=args.css_file)
Exemplo n.º 8
0
                    help='Transciprts paths',
                    nargs='*',
                    default=[])
parser.add_argument('--train-frac',
                    help='Fraction of training examples',
                    type=float,
                    default=0.6)
parser.add_argument('--test-frac',
                    help='Fraction of test examples',
                    type=float,
                    default=0.2)
parser.add_argument('--dev-frac',
                    help='Fraction of dev examples',
                    type=float,
                    default=0.2)
parser.add_argument('--output-path', help='Output path for splits')
args = parser.parse_args()

np.random.seed(0)
json_data = ([], [], [])
for path in args.example_paths:
    examples = read_json(path)
    folds = np.random.choice(
        3, len(examples), p=[args.train_frac, args.dev_frac, args.test_frac])
    for ex, fold in izip(examples, folds):
        json_data[fold].append(ex)

for fold, dataset in izip(('train', 'dev', 'test'), json_data):
    if len(dataset) > 0:
        write_json(dataset, '%s%s.json' % (args.output_path, fold))
Exemplo n.º 9
0
    add_graph_arguments(parser)
    add_graph_embed_arguments(parser)
    add_learner_arguments(parser)
    args = parser.parse_args()

    random.seed(args.random_seed)
    logstats.init(args.stats_file)
    logstats.add_args('config', args)

    # Save or load models
    if args.init_from:
        start = time.time()
        print 'Load model (config, vocab, checkpoint) from', args.init_from
        config_path = os.path.join(args.init_from, 'config.json')
        vocab_path = os.path.join(args.init_from, 'vocab.pkl')
        saved_config = read_json(config_path)
        saved_config['decoding'] = args.decoding
        saved_config['batch_size'] = args.batch_size
        model_args = argparse.Namespace(**saved_config)

        # Checkpoint
        if args.test and args.best:
            ckpt = tf.train.get_checkpoint_state(args.init_from + '-best')
        else:
            ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, 'No checkpoint found'
        assert ckpt.model_checkpoint_path, 'No model path found in checkpoint'

        # Load vocab
        mappings = read_pickle(vocab_path)
        print 'Done [%fs]' % (time.time() - start)
Exemplo n.º 10
0
        plt.tight_layout()
        plt.savefig(os.path.join(args.output, '%d-utterance.pdf' % n))

if args.attr:
    #stats_files = ['%s_stats.json' % x for x in args.stats]
    stats_files = args.stats
    ncol = 1
    nrow = len(stats_files)
    #stats = ['max_count', 'max_min_ratio', 'max_count_normalize', 'max_min_ratio_normalize']
    stats = ['max_min_ratio_normalize', 'entity_count']
    stat_names = ['Skewness of the first mentioned attribute', 'Relative count of the first mentioned entity']
    for stat, stat_name in izip(stats, stat_names):
        plt.cla()
        fig, axes = plt.subplots(nrows=nrow, ncols=ncol, sharex=True, sharey=True)
        for i, (ax, stat_file, name) in enumerate(izip(axes, stats_files, args.names)):
            all_stats = read_json(stat_file)
            data = all_stats['entity_mention']['first'][stat]
            background = all_stats['entity_mention']['all'][stat]
            print name, stat_name, np.mean(data)
            ax.hist(background, 30, edgecolor='g', normed=True, alpha=0.7, label='BG', fill=False, linewidth=3, histtype='step')
            ax.hist(data, 30, edgecolor='r', normed=True, alpha=0.7, label='First', fill=False, linewidth=3, histtype='step')
            if i == 0:
                ax.legend(ncol=2, bbox_to_anchor=(1,1.5))
            ax.set_yscale('log')
            #ax.locator_params(nbins=4, axis='y')
            ax.set_title(name, fontsize='x-large')
        ax.set_xlabel(stat_name, fontsize='x-large')
        axbox = axes[0].get_position()
        plt.tight_layout()
        plt.savefig('%s/first_attr_%s.pdf' % (args.output, stat))
Exemplo n.º 11
0
                row[question] = np.mean(scores)
        metadata['data'].append(row)
    write_json(metadata, os.path.join(outdir, 'metadata.json'))

def write_viewer_data(html_output, transcripts, responses=None):
    if not os.path.exists(html_output):
        os.makedirs(html_output)
    write_metadata(transcripts, html_output, responses)
    write_chat_htmls(transcripts, html_output, responses)


if __name__ == "__main__":
    parser = ArgumentParser()
    add_scenario_arguments(parser)
    add_visualization_arguments(parser)
    parser.add_argument('--transcripts', type=str, default='transcripts.json', help='Path to directory containing transcripts')


    args = parser.parse_args()
    schema = Schema(args.schema_path)
    #scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path))
    transcripts = read_json(args.transcripts)
    html_output = args.html_output

    if args.viewer_mode:
        # External js and css
        write_viewer_data(html_output, transcripts)
    else:
        # Inline style
        visualize_transcripts(html_output, transcripts, css_file=args.css_file)
Exemplo n.º 12
0
        plot_alpha_stats(strategy_stats["alpha_stats"], args.plot_alpha_stats)

    if args.plot_item_stats:
        plot_num_item_stats(strategy_stats["num_items_stats"],
                            args.plot_item_stats)
    json.dump(stats, statsfile)
    statsfile.close()


if __name__ == "__main__":
    parser = ArgumentParser()
    add_scenario_arguments(parser)
    add_lexicon_arguments(parser)
    parser.add_argument('--transcripts',
                        type=str,
                        default='transcripts.json',
                        help='Path to directory containing transcripts')
    add_statistics_arguments(parser)

    parsed_args = parser.parse_args()
    schema = Schema(parsed_args.schema_path)
    scenario_db = ScenarioDB.from_dict(schema,
                                       read_json(parsed_args.scenarios_path))
    transcripts = json.load(open(parsed_args.transcripts, 'r'))
    # transcripts = transcripts[:100]
    lexicon = Lexicon(schema,
                      False,
                      scenarios_json=parsed_args.scenarios_path,
                      stop_words=parsed_args.stop_words)
    compute_statistics(parsed_args, lexicon, schema, scenario_db, transcripts)