示例#1
0
def init_static_dialog_agent(args) :
    print "reading in Ontology"
    ont = Ontology.Ontology(sys.argv[1])
    print "predicates: " + str(ont.preds)
    print "types: " + str(ont.types)
    print "entries: " + str(ont.entries)

    print "reading in Lexicon"
    lex = Lexicon.Lexicon(ont, sys.argv[2])
    print "surface forms: " + str(lex.surface_forms)
    print "categories: " + str(lex.categories)
    print "semantic forms: " + str(lex.semantic_forms)
    print "entries: " + str(lex.entries)

    print "instantiating Feature Extractor"
    f_extractor = FeatureExtractor.FeatureExtractor(ont, lex)

    print "instantiating Linear Learner"
    learner = LinearLearner.LinearLearner(ont, lex, f_extractor)

    print "instantiating KBGrounder"
    grounder = KBGrounder.KBGrounder(ont)

    load_parser_from_file = False
    if len(args) > 4 :
        if args[4].lower() == 'true' :
            load_parser_from_file = True
            
    if load_parser_from_file :
        parser = load_model('static_parser')
        grounder.parser = parser
        grounder.ontology = parser.ontology
    else :
        print "instantiating Parser"
        parser = Parser.Parser(ont, lex, learner, grounder, beam_width=10, safety=True)

    print "instantiating Generator"
    generator = Generator.Generator(ont, lex, learner, parser, beam_width=sys.maxint, safety=True)

    print "instantiating DialogAgent"
    static_policy = StaticDialogPolicy.StaticDialogPolicy()
    A = StaticDialogAgent(parser, generator, grounder, static_policy, None, None)

    if not load_parser_from_file :
        print "reading in training data"
        D = A.read_in_utterance_action_pairs(args[3])

        if len(args) > 4 and args[4] == "both":
            print "training parser and generator jointly from actions"
            converged = A.jointly_train_parser_and_generator_from_utterance_action_pairs(
                D, epochs=10, parse_beam=30, generator_beam=10)
        else:
            print "training parser from actions"
            converged = A.train_parser_from_utterance_action_pairs(
                D, epochs=10, parse_beam=30)

        print "theta: "+str(parser.learner.theta)
        save_model(parser, 'static_parser')
    
    return A
示例#2
0
def init_pomdp_dialog_agent(args) :
    print "Reading in Ontology"
    ont = Ontology.Ontology(args[1])
    print "predicates: " + str(ont.preds)
    print "types: " + str(ont.types)
    print "entries: " + str(ont.entries)

    print "Reading in Lexicon"
    lex = Lexicon.Lexicon(ont, args[2])
    print "surface forms: " + str(lex.surface_forms)
    print "categories: " + str(lex.categories)
    print "semantic forms: " + str(lex.semantic_forms)
    print "entries: " + str(lex.entries)

    print "Instantiating Feature Extractor"
    f_extractor = FeatureExtractor.FeatureExtractor(ont, lex)

    print "Instantiating Linear Learner"
    learner = LinearLearner.LinearLearner(ont, lex, f_extractor)

    print "Instantiating KBGrounder"
    grounder = KBGrounder.KBGrounder(ont)

    load_models_from_file = False
    if len(args) > 4 :
        if args[4].lower() == 'true' :
            load_models_from_file = True

    if load_models_from_file :
        parser = load_model('pomdp_parser')
        grounder.parser = parser
        grounder.ontology = parser.ontology
    else :
        print "Instantiating Parser"
        parser = Parser.Parser(ont, lex, learner, grounder, beam_width=10)

    print "Instantiating DialogAgent"
    if load_models_from_file :
        agent = PomdpDialogAgent(parser, grounder, None, None, parse_depth=10, load_policy_from_file=True)
    else :
        agent = PomdpDialogAgent(parser, grounder, None, None, parse_depth=10, load_policy_from_file=False)

    if not load_models_from_file :
        print "reading in data and training parser from actions"
        D = agent.read_in_utterance_action_pairs(args[3])
        converged = agent.train_parser_from_utterance_action_pairs(D, epochs=10, parse_beam=30)
        print "theta: "+str(parser.learner.theta)
        save_model(parser, 'pomdp_parser')
        #print 'Parser ontology : ', parser.ontology.preds

    return agent
示例#3
0
    def create_pomdp_dialog_agent(self, parser_file=None):
        ont = copy.deepcopy(self.ont)
        lex = copy.deepcopy(self.lex)

        print "instantiating KBGrounder"
        grounder = KBGrounder.KBGrounder(ont)

        if self.load_models_from_file:
            parser = load_model(parser_file)
        else:
            parser = CKYParser.CKYParser(ont, lex, use_language_model=True)
            # Set parser hyperparams to best known values for training
            parser.max_multiword_expression = 2  # max span of a multi-word expression to be considered during tokenization
            parser.max_new_senses_per_utterance = 3  # max number of new word senses that can be induced on a training example
            parser.max_cky_trees_per_token_sequence_beam = 100  # for tokenization of an utterance, max cky trees considered
            parser.max_hypothesis_categories_for_unknown_token_beam = 3  # for unknown token, max syntax categories tried
            # Train parser
            d = parser.read_in_paired_utterance_semantics(
                self.parser_train_file)
            converged = parser.train_learner_on_semantic_forms(
                d, 10, reranker_beam=10)
            save_model(parser, parser_file)

        # Set parser hyperparams to best known values for test time
        parser.max_multiword_expression = 2  # max span of a multi-word expression to be considered during tokenization
        parser.max_new_senses_per_utterance = 2  # max number of new word senses that can be induced on a training example
        parser.max_cky_trees_per_token_sequence_beam = 1000  # for tokenization of an utterance, max cky trees considered
        parser.max_hypothesis_categories_for_unknown_token_beam = 2  # for unknown token, max syntax categories tried

        grounder.parser = parser
        grounder.ontology = parser.ontology

        print "Instantiating DialogAgent"
        if self.load_models_from_file:
            policy = load_model('ktdq_policy')
            policy.untrained = False
            policy.training = False
        else:
            knowledge = Knowledge()
            policy = PomdpKtdqPolicy(knowledge)
            policy.untrained = True
            policy.training = True
        agent = PomdpDialogAgent(parser, grounder, policy, None, None)
        agent.retrain_parser = False
        return agent
示例#4
0
def init_dialog_agent(args):
    print "Reading in Ontology"
    ont = Ontology.Ontology(args[1])
    print "predicates: " + str(ont.preds)
    print "types: " + str(ont.types)
    print "entries: " + str(ont.entries)

    print "Reading in Lexicon"
    lex = Lexicon.Lexicon(ont, args[2])
    print "surface forms: " + str(lex.surface_forms)
    print "categories: " + str(lex.categories)
    print "semantic forms: " + str(lex.semantic_forms)
    print "entries: " + str(lex.entries)

    print "Instantiating Feature Extractor"
    f_extractor = FeatureExtractor.FeatureExtractor(ont, lex)

    print "Instantiating Linear Learner"
    learner = LinearLearner.LinearLearner(ont, lex, f_extractor)

    print "Instantiating KBGrounder"
    grounder = KBGrounder.KBGrounder(ont)

    print "Instantiating Parser"
    parser = Parser.Parser(ont, lex, learner, grounder, beam_width=10)
    parser = load_model('parser')
    grounder.parser = parser
    grounder.ontology = parser.ontology

    print "Instantiating DialogAgent"
    agent = PomdpDialogAgent(parser, grounder, None, None)

    #print "reading in data and training parser from actions"
    #D = agent.read_in_utterance_action_pairs(args[3])
    #converged = agent.train_parser_from_utterance_action_pairs(D, epochs=10, parse_beam=30)
    #print "theta: "+str(parser.learner.theta)
    #save_model(parser, 'parser')
    #print 'Parser ontology : ', parser.ontology.preds

    return agent
示例#5
0
print "reading in Ontology"
ont = Ontology.Ontology(sys.argv[1])
print "predicates: " + str(ont.preds)
print "types: " + str(ont.types)
print "entries: " + str(ont.entries)

print "reading in Lexicon"
lex = Lexicon.Lexicon(ont, sys.argv[2])
print "surface forms: " + str(lex.surface_forms)
print "categories: " + str(lex.categories)
print "semantic forms: " + str(lex.semantic_forms)
print "entries: " + str(lex.entries)

print "instantiating KBGrounder"
grounder = KBGrounder.KBGrounder(ont)

#print "instantiating Parser"
#parser = CKYParser.CKYParser(ont, lex, use_language_model=True)
## Set parser hyperparams to best known values for training
#parser.max_multiword_expression = 2  # max span of a multi-word expression to be considered during tokenization
#parser.max_new_senses_per_utterance = 2  # max number of new word senses that can be induced on a training example
#parser.max_cky_trees_per_token_sequence_beam = 1000  # for tokenization of an utterance, max cky trees considered
#parser.max_hypothesis_categories_for_unknown_token_beam = 5  # for unknown token, max syntax categories tried
#parser.max_expansions_per_non_terminal = 5 # max number of backpointers stored per nonterminal per cell in CKY chart
#d = parser.read_in_paired_utterance_semantics(sys.argv[3])
#converged = parser.train_learner_on_semantic_forms(d, 10, reranker_beam=10)
#if not converged:
#raise AssertionError("Training failed to converge to correct values.")
#save_model(parser, 'parser')
parser = load_model('parsers/parser_1000')
示例#6
0
def main():

    # Load parameters from command line.
    parser_fn = FLAGS_parser_fn
    word_embeddings_fn = FLAGS_word_embeddings_fn
    kb_static_facts_fn = FLAGS_kb_static_facts_fn
    kb_perception_source_dir = FLAGS_kb_perception_source_dir
    kb_perception_feature_dir = FLAGS_kb_perception_feature_dir
    active_test_set = [int(oidx) for oidx in FLAGS_active_test_set.split(',')]
    active_train_set = [
        int(oidx) for oidx in FLAGS_active_train_set.split(',')
    ] if FLAGS_active_train_set is not None else None
    server_spin_time = FLAGS_server_spin_time
    cycles_per_user = FLAGS_cycles_per_user
    client_dir = FLAGS_client_dir
    log_dir = FLAGS_log_dir
    data_dir = FLAGS_data_dir
    write_classifiers = FLAGS_write_classifiers
    load_grounder = FLAGS_load_grounder
    num_dialogs = FLAGS_num_dialogs
    init_phase = FLAGS_init_phase

    # Load the parser from file.
    print "main: loading parser from file..."
    with open(parser_fn, 'rb') as f:
        p = pickle.load(f)
    p.lexicon.wv = p.lexicon.load_word_embeddings(word_embeddings_fn)
    print "main: ... done"

    # Create a new labels.pickle that erases the labels of the active training set for test purposes.
    full_annotation_fn = os.path.join(kb_perception_source_dir,
                                      'full_annotations.pickle')
    if os.path.isfile(full_annotation_fn):
        print "main: creating new labels.pickle that blinds the active training set for this test..."
        with open(full_annotation_fn, 'rb') as f:
            fa = pickle.load(f)
        with open(os.path.join(kb_perception_source_dir, 'labels.pickle'),
                  'wb') as f:
            labels = []
            for oidx in fa:
                if active_train_set is None or oidx not in active_train_set:
                    for pidx in range(len(fa[oidx])):
                        labels.append((pidx, oidx, fa[oidx][pidx]))
            pickle.dump(labels, f)
        print "main: ... done"

    # Instantiate a grounder.
    grounder_fn = os.path.join(client_dir, 'grounder.pickle')
    if load_grounder != 1:
        print "main: instantiating grounder..."
        g = KBGrounder.KBGrounder(p, kb_static_facts_fn,
                                  kb_perception_source_dir,
                                  kb_perception_feature_dir, active_test_set)
        if write_classifiers:
            print "main: and writing grounder perception classifiers to file..."
            g.kb.pc.commit_changes()  # save classifiers to disk
        print "main: writing grounder to pickle..."
        with open(grounder_fn, 'wb') as f:
            pickle.dump(g, f)
        print "main: ... done"

    # Start the Server.
    print "main: instantiated server..."
    s = Server(active_train_set, grounder_fn, server_spin_time,
               cycles_per_user, client_dir, log_dir, data_dir, num_dialogs,
               init_phase)
    print "main: ... done"

    print "main: spinning server..."
    s.spin()
示例#7
0
def main():

    # Load parameters from command line.
    parser_fn = FLAGS_parser_fn
    kb_static_facts_fn = FLAGS_kb_static_facts_fn
    kb_perception_source_dir = FLAGS_kb_perception_source_dir
    kb_perception_feature_dir = FLAGS_kb_perception_feature_dir
    active_train_set = [str(oidx) for oidx in FLAGS_active_train_set.split(',')] \
        if FLAGS_active_train_set is not None else None
    active_test_set = [str(oidx) for oidx in FLAGS_active_test_set.split(',')]
    outfile = FLAGS_outfile

    # Create a new labels.pickle that erases the labels of the active training set for test purposes.
    full_annotation_fn = os.path.join(kb_perception_source_dir,
                                      'full_annotations.pickle')
    if os.path.isfile(full_annotation_fn):
        print "main: creating new labels.pickle that blinds the active training set for this test..."
        with open(full_annotation_fn, 'rb') as f:
            fa = pickle.load(f)
        with open(os.path.join(kb_perception_source_dir, 'labels.pickle'),
                  'wb') as f:
            labels = []
            for oidx in fa:
                if active_train_set is None or oidx not in active_train_set:
                    for pidx in range(len(fa[oidx])):
                        labels.append((pidx, oidx, fa[oidx][pidx]))
            pickle.dump(labels, f)
    with open(parser_fn, 'rb') as f:
        p = pickle.load(f)
    g = KBGrounder.KBGrounder(p, kb_static_facts_fn, kb_perception_source_dir,
                              kb_perception_feature_dir, active_test_set)

    # Start dumping HTML.
    table_format = "<table border=1px cellspacing=1px cellpadding=1px>"
    with open(outfile, 'wb') as f:

        f.write("<p><b>Train object data</b>")
        f.write(
            table_format +
            "<tr><th>predicate</th><th>positive</th><th>negative</th></tr>")
        preds = g.kb.perceptual_preds
        w = 3
        for pidx in range(len(preds)):
            f.write("<tr><td>" + preds[pidx] + "</td>")

            pairs = []
            oidx_votes = {}
            for pjdx, oidx, l in g.kb.pc.labels:
                if pjdx == pidx and oidx not in g.kb.pc.active_test_set:
                    if oidx not in oidx_votes:
                        oidx_votes[oidx] = []
                    oidx_votes[oidx].append(1 if l else -1)
            for oidx in oidx_votes:
                s = sum(oidx_votes[oidx])
                if s > 0:
                    pairs.append((oidx, 1, oidx_votes[oidx].count(1),
                                  oidx_votes[oidx].count(-1)))
                elif s < 0:
                    pairs.append((oidx, -1, oidx_votes[oidx].count(1),
                                  oidx_votes[oidx].count(-1)))

            for label in [1, -1]:
                f.write("<td>")
                c = 0
                f.write("<table><tr>")
                for oidx, l, pos_v, neg_v in pairs:
                    if l == label:
                        f.write("<td><img width=\"200px\" height=\"200px\" " +
                                "src=\"../www/images/objects/oidx_" +
                                str(oidx) + ".jpg\">")
                        f.write("<br/>(" + str(pos_v) + ", " + str(neg_v) +
                                ")</td>")
                        c += 1
                        if c == w:
                            f.write("</tr><tr>")
                            c = 0
                f.write("</tr></table>")
                f.write("</td>")
            f.write("</tr>")
        f.write("</table></p>")

        f.write("<hr>")
        f.write("<p><b>Test object results</b>")
        f.write(table_format + "<tr><th>predicate</th>")
        for idx in range(len(active_test_set)):
            f.write("<th>" + str(idx + 1) + "</th>")
        f.write("</tr>")

        # Run each trained classifier on each object in the test set.
        for pidx in range(len(preds)):
            f.write("<tr><td>" + preds[pidx] + "</td>")
            if g.kb.pc.classifiers[pidx] is not None:
                oidx_pos = {}
                for oidx in active_test_set:
                    q = (preds[pidx], "oidx_" + str(oidx))
                    pos, neg = g.kb.query(q)
                    oidx_pos[oidx] = pos
                s = sum([oidx_pos[oidx] for oidx in oidx_pos.keys()])
                oidx_d = {oidx: oidx_pos[oidx] / s for oidx in oidx_pos.keys()}
                for oidx, pos in sorted(oidx_pos.items(),
                                        key=operator.itemgetter(1),
                                        reverse=True):
                    f.write("<td><img width=\"200px\" height=\"200px\" " +
                            "src=\"../www/images/objects/oidx_" + str(oidx) +
                            ".jpg\"><br/>" + str(pos) + "<br/>" +
                            str(oidx_d[oidx]) + "</td>")
            else:
                for _ in range(len(active_test_set)):
                    f.write("<td>&nbsp;</td>")
            f.write("</tr>")
        f.write("</table></p>")
示例#8
0
def main():

    # Load parameters from command line.
    parser_fn = FLAGS_parser_fn
    word_embeddings_fn = FLAGS_word_embeddings_fn
    io_type = FLAGS_io_type
    grounder_fn = FLAGS_grounder_fn
    active_train_set = [
        int(oidx) for oidx in FLAGS_active_train_set.split(',')
    ] if FLAGS_active_train_set is not None else None
    kb_static_facts_fn = None
    kb_perception_source_dir = None
    kb_perception_feature_dir = None
    active_test_set = None
    if grounder_fn is None:
        kb_static_facts_fn = FLAGS_kb_static_facts_fn
        kb_perception_source_dir = FLAGS_kb_perception_source_dir
        kb_perception_feature_dir = FLAGS_kb_perception_feature_dir
        active_test_set = [
            int(oidx) for oidx in FLAGS_active_test_set.split(',')
        ]
    write_classifiers = FLAGS_write_classifiers
    uid = FLAGS_uid
    data_dir = FLAGS_data_dir
    client_dir = FLAGS_client_dir
    spin_time = FLAGS_spin_time
    num_dialogs = FLAGS_num_dialogs
    init_phase = FLAGS_init_phase
    max_syn_qs = FLAGS_max_syn_qs
    max_opp_qs = FLAGS_max_opp_qs
    image_path = FLAGS_image_path
    bbc_demo = FLAGS_bbc_demo
    no_clarify = FLAGS_no_clarify.split(
        ',') if FLAGS_no_clarify is not None else None
    assert io_type == 'keyboard' or io_type == 'server' or io_type == 'robot'
    assert io_type != 'server' or (uid is not None and client_dir is not None
                                   and data_dir is not None)
    assert io_type != 'robot' or image_path is not None

    if grounder_fn is None:

        # Load the parser from file.
        print "main: loading parser from file..."
        with open(parser_fn, 'rb') as f:
            p = pickle.load(f)
        p.lexicon.wv = p.lexicon.load_word_embeddings(word_embeddings_fn)
        print "main: ... done"

        # Create a new labels.pickle that erases the labels of the active training set for test purposes.
        full_annotation_fn = os.path.join(kb_perception_source_dir,
                                          'full_annotations.pickle')
        if os.path.isfile(full_annotation_fn):
            print "main: creating new labels.pickle that blinds the active training set for this test..."
            with open(full_annotation_fn, 'rb') as f:
                fa = pickle.load(f)
            with open(os.path.join(kb_perception_source_dir, 'labels.pickle'),
                      'wb') as f:
                labels = []
                for oidx in fa:
                    if active_train_set is None or oidx not in active_train_set:
                        for pidx in range(len(fa[oidx])):
                            labels.append((pidx, oidx, fa[oidx][pidx]))
                pickle.dump(labels, f)
            print "main: ... done"

        # Instantiate a grounder.
        print "main: instantiating grounder..."
        g = KBGrounder.KBGrounder(p, kb_static_facts_fn,
                                  kb_perception_source_dir,
                                  kb_perception_feature_dir, active_test_set)
        if write_classifiers:
            print "main: and writing grounder perception classifiers to file..."
            g.kb.pc.commit_changes()  # save classifiers to disk
        print "main: ... done"

    else:
        # Load a grounder from file
        print "main: loading grounder from file..."
        with open(grounder_fn, 'rb') as f:
            g = pickle.load(f)
        print "main: ... done"

        # Grab a reference to the parser from the loaded grounder.
        p = g.parser

    # Instantiate an input/output
    print "main: instantiating IO..."
    use_shorter_utterances = False
    if io_type == 'keyboard':
        io = IO.KeyboardIO()
    elif io_type == 'server':
        io = IO.SeverIO(uid, client_dir, spin_time=spin_time)
    elif io_type == 'robot':  # includes some hard-coded expectations like 2 tables, 8 training objects
        if len(active_train_set) == 8:  # All are train objects
            table_oidxs = {
                1: active_train_set[0:4],
                2: active_train_set[4:8],
                3: None
            }
        else:  # Table 1 test objects, Table 2 train objects
            table_oidxs = {1: active_test_set[:], 2: active_train_set[:]}
        rospy.init_node('phm_node')
        print "WARNING: ensure robot is facing Table 2 on startup!"
        io = IO.RobotIO(table_oidxs, 2, image_path)
        use_shorter_utterances = True
    else:
        io = None  # won't be executed due to asserts
    print "main: ... done"

    # Normal operation.
    if init_phase == 0 and bbc_demo != 1:
        # Instantiate an Agent.
        print "main: instantiating Agent..."
        a = Agent.Agent(p,
                        g,
                        io,
                        active_train_set,
                        no_clarify=no_clarify,
                        use_shorter_utterances=use_shorter_utterances,
                        word_neighbors_to_consider_as_synonyms=max_syn_qs,
                        max_perception_subdialog_qs=max_opp_qs)
        print "main: ... done"

        # Start a dialog.
        perception_labels_requested = []
        action_confirmed_per_dialog = []
        utterances_by_role_per_dialog = []
        for _ in range(num_dialogs):
            print "main: running command dialog..."
            action_confirmed, user_utterances_by_role = a.start_action_dialog(
                perception_labels_requested=perception_labels_requested)
            action_confirmed_per_dialog.append(action_confirmed)
            utterances_by_role_per_dialog.append(user_utterances_by_role)
            print "main: ... done; got action " + str(action_confirmed)

            # Write out new information gleaned from this user after every dialog.
            if uid is not None:  # DEBUG
                print "main: writing new information from dialog(s) to file..."
                fn = os.path.join(data_dir, uid + ".pickle")
                d = [
                    action_confirmed_per_dialog, utterances_by_role_per_dialog,
                    a.new_perceptual_labels, a.perceptual_pred_synonymy
                ]
                with open(fn, 'wb') as f:
                    pickle.dump(d, f)
                print "main: ... done; wrote data d = " + str(d)

    # Ask for pointing commands.
    elif bbc_demo == 1:
        print "main: instantiating Agent..."
        a = Agent.Agent(p,
                        g,
                        io,
                        active_train_set,
                        no_clarify=no_clarify,
                        use_shorter_utterances=use_shorter_utterances,
                        word_neighbors_to_consider_as_synonyms=max_syn_qs,
                        max_perception_subdialog_qs=max_opp_qs)
        print "main: ... done"

        print "main: updating lexicon with 'rattling'"
        a.add_new_perceptual_lexical_entries('rattling', False, None)
        a.parser.type_raise_bare_nouns()
        a.parser.theta.update_probabilities()
        print "main: ... done"

        print "main: training 'rattling' classifier"
        g.kb.pc.update_classifiers(['rattling'], [], [], [])
        perception_pidx = g.kb.pc.predicates.index('rattling')
        upidxs = [perception_pidx] * 8
        uoidxs = [5, 14, 4, 27, 0, 30, 1, 31]
        ulabels = [0, 1, 0, 0, 0, 0, 1, 1]
        g.kb.pc.update_classifiers([], upidxs, uoidxs, ulabels)
        print "main: ... done"

        print "main: starting bbc phase dialog..."
        io.say_to_user("What should I do?")
        cmd = io.get_from_user()
        p = cmd.split()
        if (p[0] == 'point' or p[1] == 'points') and (p[1] == 'to'
                                                      or p[1] == '2'):
            gps, _ = a.parse_and_ground_utterance(' '.join(p[2:]))
            for g, conf in gps:
                selected_oidx = a.parser.ontology.preds[g.idx]
                print selected_oidx, conf  # DEBUG
            g, top_conf = gps[0]
            for g, conf in gps:
                if conf == top_conf:
                    selected_oidx = a.parser.ontology.preds[g.idx]
                    oidx = int(
                        selected_oidx.split('_')[1])  # e.g. 'oidx_1' -> 1
                    ttid = None
                    for tid in a.io.table_oidxs:
                        if a.io.table_oidxs[
                                tid] is not None and oidx in a.io.table_oidxs[
                                    tid]:
                            ttid = tid
                    if ttid is not None:
                        a.io.face_table(ttid)
                        a.io.point(a.io.table_oidxs[ttid].index(oidx))
                    a.io.point(-1)

        print "main: ... done"

    # Just ask the user for a few rephrases of the command.
    else:
        print "main: starting init phase dialog..."
        for nd in range(num_dialogs):
            io.say_to_user("What should I do?")
            _ = io.get_from_user()
            for ip in range(init_phase - 1):
                io.say_to_user(
                    "What's another way you could phrase that command?")
                _ = io.get_from_user()
            io.perform_action({'action': 'init_phase'})
        print "main: ... done"
def main():

    # Load parameters from command line.
    agg_fns = FLAGS_agg_fns.split(',')
    parser_fn = FLAGS_parser_fn
    embeddings_fn = FLAGS_embeddings_fn
    parser_outfile = FLAGS_parser_outfile
    parser_base_pairs_fn = FLAGS_parser_base_pairs_fn
    only_use_base_pairs = True if FLAGS_only_use_base_pairs == 1 else False
    kb_static_facts_fn = FLAGS_kb_static_facts_fn
    kb_perception_feature_dir = FLAGS_kb_perception_feature_dir
    kb_perception_source_base_dir = FLAGS_kb_perception_source_base_dir
    kb_perception_source_target_dir = FLAGS_kb_perception_source_target_dir
    active_test_set = [int(oidx) for oidx in FLAGS_active_test_set.split(',')]
    only_bare_nouns = True if FLAGS_only_bare_nouns == 1 else False
    training_log_fn = FLAGS_training_log_fn
    full_pairs_log_fn = FLAGS_full_pairs_log_fn
    epochs = FLAGS_epochs
    use_condor = FLAGS_use_condor
    condor_target_dir = FLAGS_condor_target_dir
    condor_parser_script_dir = FLAGS_condor_parser_script_dir
    condor_grounder_script_dir = FLAGS_condor_grounder_script_dir
    assert not use_condor or (condor_target_dir is not None
                              and condor_parser_script_dir is not None
                              and condor_grounder_script_dir is not None)

    # Load the aggregate information from file
    print "main: loading aggregate conversation files..."
    agg_all_utterances = []
    agg_role_utterances_role_chosen_pairs = []
    agg_perceptual_labels = []
    agg_perceptual_synonymy = []
    for agg_fn in agg_fns:
        print "main: ... loading from '" + agg_fn + "'"
        with open(agg_fn, 'rb') as f:
            _agg_all_utterances, _agg_role_utterances_role_chosen_pairs, _agg_perceptual_labels,\
                _agg_perceptual_synonymy = pickle.load(f)
            agg_all_utterances.extend(_agg_all_utterances)
            agg_role_utterances_role_chosen_pairs.extend(
                _agg_role_utterances_role_chosen_pairs)
            agg_perceptual_labels.extend(_agg_perceptual_labels)
            agg_perceptual_synonymy.extend(_agg_perceptual_synonymy)
    print "... done"

    # Load a grounder from file
    print "main: loading base parser from file..."
    with open(parser_fn, 'rb') as f:
        p = pickle.load(f)
        p.lexicon.wv = None
        if embeddings_fn is not None:
            print "main: ... adding embeddings"
            p.lexicon.wv = p.lexicon.load_word_embeddings(embeddings_fn)
    print "main: ... done"

    # Load parser base pairs, if any.
    print "main: loading base parser pairs from file..."
    if parser_base_pairs_fn is not None:
        parser_base_pairs = p.read_in_paired_utterance_semantics(
            parser_base_pairs_fn)
    else:
        parser_base_pairs = []
    print "main: ... done"

    # Copy the base grounder labels.pickle and predicates.pickle into the target directory.
    print "main: copying base KB perception labels and pickles to target dir..."
    base_labels_fn = os.path.join(kb_perception_source_base_dir,
                                  "labels.pickle")
    base_pickles_fn = os.path.join(kb_perception_source_base_dir,
                                   "predicates.pickle")
    if os.path.isfile(base_labels_fn):
        os.system(
            "cp " + base_labels_fn + " " +
            os.path.join(kb_perception_source_target_dir, "labels.pickle"))
    else:
        print "ERROR: file not found '" + base_labels_fn + "'"
        return 1
    if os.path.isfile(base_pickles_fn):
        os.system(
            "cp " + base_pickles_fn + " " +
            os.path.join(kb_perception_source_target_dir, "predicates.pickle"))
    else:
        print "ERROR: file not found '" + base_pickles_fn + "'"
        return 1
    print "main: ... done"

    # Instantiate a new grounder with the base parser and with perception source at the target dir.
    print "main: instantiating grounder..."
    g = KBGrounder.KBGrounder(p, kb_static_facts_fn,
                              kb_perception_source_target_dir,
                              kb_perception_feature_dir, active_test_set)
    print "main: ... done"

    # Instantiate vestigial input/output
    print "main: instantiating basic IO..."
    io = IO.KeyboardIO()
    print "main: ... done"

    # Instantiate an Agent.
    print "main: instantiating Agent..."
    a = Agent.Agent(p, g, io, None)
    print "main: ... done"

    # Open logfile.
    log_f = open(training_log_fn, 'w')

    # Look through aggregated labels to identify good perceptual candidates.
    preds_by_label = {}
    for pred, oidx, l in agg_perceptual_labels:
        if pred not in preds_by_label:
            preds_by_label[pred] = {}
        if oidx not in preds_by_label[pred]:
            preds_by_label[pred][oidx] = 0
        preds_by_label[pred][oidx] += 1 if l else -1
    # print "main: preds_by_label: " + str(preds_by_label)
    preds_by_oidx_label = {}
    for pred in preds_by_label:
        preds_by_oidx_label[pred] = {True: [], False: []}
        for oidx in preds_by_label[pred]:
            if preds_by_label[pred][oidx] > 0:
                preds_by_oidx_label[pred][True].append(oidx)
            elif preds_by_label[pred][oidx] < 0:
                preds_by_oidx_label[pred][False].append(oidx)
    # print "main: preds_by_oidx_label: " + str(preds_by_oidx_label)
    preds_w_pos = [
        pred for pred in preds_by_oidx_label
        if len(preds_by_oidx_label[pred][True]) > 0
    ]
    print "main: preds_w_pos: " + str(preds_w_pos)

    # Analyze synonymy votes and decide which pairs to treat as synonymous.
    synonymy_votes = {
    }  # maps from tuples of preds to the sum of votes for and against their being synonymous
    for predi, predj, v in agg_perceptual_synonymy:
        if (predi, predj) in synonymy_votes:
            key = (predi, predj)
        elif (predj, predi) in synonymy_votes:
            key = (predj, predi)
        else:
            key = (predi, predj)
            synonymy_votes[key] = 0
        synonymy_votes[key] += 1 if v else -1
    print "main: synonymy votes: " + str(synonymy_votes)
    synonymy_candidates = {
        key: synonymy_votes[key]
        for key in synonymy_votes.keys() if synonymy_votes[key] > 0
    }
    print "main: synonymy candidates: " + str(synonymy_candidates)

    # Decide based on synonymy and pred labels which lexicon entries to add (similar to procedure in Agent.py,
    # but based on voting instead of single-user feedback.)
    all_preds = list(
        set([pred for pred, _, _ in agg_perceptual_labels] +
            [pred for pred, _ in synonymy_votes.keys()] +
            [pred for _, pred in synonymy_votes.keys()]))
    preds = [
        pred for pred in all_preds
        if pred not in a.parser.lexicon.surface_forms and
        (pred in preds_w_pos or len([
            synp
            for synp in preds_w_pos if (pred, synp) in synonymy_candidates or
            (synp, pred) in synonymy_candidates
        ])) > 0
    ]
    print "main: preds to consider: " + str(preds)
    utterances_with_pred = {}
    for pred in all_preds:
        utterances_with_pred[pred] = []
        for u in agg_all_utterances:
            if pred in a.parser.tokenize(u):
                utterances_with_pred[pred].append(u)
    # print "main: utterances with preds: " + str(utterances_with_pred)

    # Iterate over pedicates to identify likely adjectives (those left of other already-known predicates).
    # This process should repeat until no further adjectives around found (allowing chaining unseen adjs).
    # Afterwards, any predicate not flagged as an adjective is probably a noun (no percept neighbors to the right).
    pred_is_perc = {pred: False for pred in preds}
    new_perceptual_adds = True
    known_perc_preds = [
        tk for tk in a.parser.lexicon.surface_forms
        if a.is_token_perceptual(tk)
    ]
    while new_perceptual_adds:
        new_perceptual_adds = False
        print "main: checking for new adjectives and nouns..."
        for pred in preds:
            if not pred_is_perc[pred] and len(utterances_with_pred[pred]) > 0:
                syn = get_syn_from_candidates(a, pred, synonymy_candidates)

                if only_bare_nouns:

                    # Add bare nouns, later type-raise.
                    a.add_new_perceptual_lexical_entries(pred, False, syn)
                    print "main: added noun for '" + pred + "'"
                    if syn is not None:
                        print "main: ... with known synonym '" + a.parser.lexicon.surface_forms[
                            syn[0]] + "'"
                    log_f.write("added noun entry for '" + pred +
                                "' with synonym " +
                                str(a.parser.lexicon.surface_forms[syn[0]]
                                    if syn is not None else None) + "\n")

                else:

                    # Turkers tend to use malformed language, so add all new preds as both adjectives and nouns.
                    if True:
                        pred_is_perc[pred] = True
                        new_perceptual_adds = True
                        ont_pred = a.add_new_perceptual_lexical_entries(
                            pred, True, syn)
                        a.add_new_perceptual_lexical_entries(
                            pred, False, syn, ont_pred)
                        print "main: added noun and adjective for '" + pred + "'"
                        if syn is not None:
                            print "main: ... with known synonym '" + a.parser.lexicon.surface_forms[
                                syn[0]] + "'"
                        log_f.write("added adjective and noun entry for '" +
                                    pred + "' with synonym " +
                                    str(a.parser.lexicon.surface_forms[syn[0]]
                                        if syn is not None else None) + "\n")

                    # Determine whether each predicate is mostly behaving like a noun or adjective before adding.
                    else:
                        # Just count how often a pred is 'acting' like an adjective or noun based on position.
                        la = ln = 0
                        for u in utterances_with_pred[pred]:
                            tks = a.parser.tokenize(u)
                            tkidx = tks.index(pred)
                            if tkidx < len(tks) - 1 and (
                                    tks[tkidx + 1] in known_perc_preds
                                    or tks[tkidx + 1] in all_preds
                                    or tks[tkidx + 1]
                                    not in a.parser.lexicon.surface_forms):
                                la += 1
                            elif tkidx == len(tks) - 1 or tks[
                                    tkidx +
                                    1] in a.parser.lexicon.surface_forms:
                                ln += 1
                        la /= float(len(utterances_with_pred[pred]))
                        ln /= float(len(utterances_with_pred[pred]))

                        if la > 0.5:
                            pred_is_perc[pred] = True
                            new_perceptual_adds = True
                            a.add_new_perceptual_lexical_entries(
                                pred, True, syn)

                            print "main: added adjective '" + pred + "'"
                            if syn is not None:
                                print "main: ... with known synonym '" + a.parser.lexicon.surface_forms[
                                    syn[0]] + "'"
                            log_f.write("added adjective '" + pred +
                                        "' with synonym " + str(syn) + "\n")

                        elif ln > 0.5:
                            pred_is_perc[pred] = True
                            new_perceptual_adds = True
                            a.add_new_perceptual_lexical_entries(
                                pred, False, syn)

                            print "main: added noun '" + pred + "'"
                            if syn is not None:
                                print "main: ... with known synonym '" + a.parser.lexicon.surface_forms[
                                    syn[0]] + "'"
                            log_f.write("added noun '" + pred +
                                        "' with synonym " + str(syn) + "\n")
    if only_bare_nouns:
        a.parser.type_raise_bare_nouns()  # should only affect new nouns
        a.parser.theta.update_probabilities(
        )  # because the above adds new entries
    print "main: ... done"

    # Retrain perceptual classifiers from aggregated labels.
    upidxs = []
    uoidxs = []
    ulabels = []
    for pred, oidx, label in agg_perceptual_labels:
        if pred in a.grounder.kb.pc.predicates:
            pidx = a.grounder.kb.pc.predicates.index(pred)
            upidxs.append(pidx)
            uoidxs.append(oidx)
            ulabels.append(1 if label else 0)
    print("main: updating predicate classifiers with " + str(len(upidxs)) +
          " new labels across " + str(len(set(upidxs))) + " predicates...")
    a.grounder.kb.pc.update_classifiers([], upidxs, uoidxs, ulabels)
    log_f.write("updated classifiers with " + str(len(upidxs)) +
                " new labels across " + str(len(set(upidxs))) +
                " predicates...\n")
    print "main: ... done"

    # Write new classifiers to file.
    print "main: committing grouder classifiers to file..."
    g.kb.pc.commit_changes()  # save classifiers to disk
    print "main: ... done"

    # Induce pairs from agg data.
    print "main: ... creating induced pairs from aggregated conversations..."
    for action_confirmed, user_utterances_by_role in agg_role_utterances_role_chosen_pairs:
        new_i_pairs = a.induce_utterance_grounding_pairs_from_conversation(
            user_utterances_by_role, action_confirmed)
        a.induced_utterance_grounding_pairs.extend(new_i_pairs)
    print "main: ...... done; induced " + str(
        len(a.induced_utterance_grounding_pairs)) + " pairs"
    log_f.write("induced " + str(len(a.induced_utterance_grounding_pairs)) +
                " utterance/grounding pairs\n")

    # DEBUG - write the Agent out to file for use by other scripts
    with open("agent.temp.pickle", 'wb') as f:
        pickle.dump(a, f)
    # END DEBUG

    # Iterate inducing new pairs using most up-to-date parser and training for single epoch.
    # Each of these stages can be distributed over the UT Condor system for more linear-time computation.
    print "main: training parser by alternative grounding->semantics and semantics->parser training steps..."
    fplfn = open(full_pairs_log_fn, 'w')
    for epoch in range(epochs):

        # Get grouding->semantics pairs
        if not only_use_base_pairs:
            print "main: ... getting utterance/semantic form pairs from induced utterance/grounding pairs..."
            utterance_semantic_grounding_triples = a.get_semantic_forms_for_induced_pairs(
                1,
                10,
                verbose=1,
                use_condor=use_condor,
                condor_target_dir=condor_target_dir,
                condor_script_dir=condor_grounder_script_dir)
            print("main: ...... got " +
                  str(len(utterance_semantic_grounding_triples)) +
                  " utterance/semantics " +
                  "pairs from induced utterance/grounding pairs")
            log_f.write("epoch " + str(epoch) + ": got " +
                        str(len(utterance_semantic_grounding_triples)) +
                        " utterance/semantic pairs\n")

            # Write out induced pairs to logfile(s) for later inspection and qualitative analysis.
            fplfn.write("epoch " + str(epoch) + ":\n\n" + '\n\n'.join([
                '\n'.join([
                    x,
                    a.parser.print_parse(y, True),
                    a.parser.print_parse(z, False)
                ]) for x, y, z in utterance_semantic_grounding_triples
            ]) + '\n\n')
        else:
            utterance_semantic_grounding_triples = []

        # Write the new parser to file.
        print "main: writing current re-trained parser to file..."
        with open(parser_outfile + "." + str(epoch), 'wb') as f:
            pickle.dump(p, f)
        print "main: ... done"

        # Train parser on utterances->semantics pairs
        print "main: ... re-training parser on pairs induced from aggregated conversations..."
        utterance_semantic_pairs = [
            [x, y] for x, y, _ in utterance_semantic_grounding_triples
        ]
        perf = []
        a.parser.train_learner_on_semantic_forms(
            parser_base_pairs + utterance_semantic_pairs,
            epochs=1,
            epoch_offset=epoch,
            reranker_beam=1,
            verbose=2,
            use_condor=use_condor,
            condor_target_dir=condor_target_dir,
            condor_script_dir=condor_parser_script_dir,
            perf_log=perf)
        log_f.write(
            "epoch " + str(epoch) + ": parser trained on " + str(perf[0][0]) +
            " examples and " + "failed on " + str(perf[0][1]) + " out of " +
            str(len(parser_base_pairs) + len(utterance_semantic_pairs)) + "\n")

    # Write the final parser to file.
    print "main: writing current re-trained parser to file..."
    with open(parser_outfile + ".final", 'wb') as f:
        pickle.dump(p, f)
    print "main: ... done"

    fplfn.close()
    print "main: ... done"

    # Close logfile.
    log_f.close()