def dump_chats(cls, cursor, scenario_db, json_path, uids=None): """Dump chat transcripts to a JSON file. Args: scenario_db (ScenarioDB): retrieve Scenario by logged uuid. json_path (str): output path. uids (list): if provided, only log chats from these users. """ if uids is None: cursor.execute('SELECT DISTINCT chat_id FROM event') ids = cursor.fetchall() else: ids = [] uids = [(x, ) for x in uids] for uid in uids: cursor.execute('SELECT chat_id FROM mturk_task WHERE name=?', uid) ids_ = cursor.fetchall() ids.extend(ids_) def is_single_agent(chat): agent_event = {0: 0, 1: 0} for event in chat.events: agent_event[event.agent] += 1 return agent_event[0] == 0 or agent_event[1] == 0 examples = [] for chat_id in ids: ex = cls.get_chat_example(cursor, chat_id[0], scenario_db) if ex is None or is_single_agent(ex): continue examples.append(ex) write_json([ex.to_dict() for ex in examples], json_path)
def dump_surveys(cls, cursor, json_path): questions = ['humanlike', 'cooperative', 'comments'] cursor.execute('''SELECT * FROM survey''') logged_surveys = cursor.fetchall() survey_data = {} agent_types = {} for survey in logged_surveys: # todo this is pretty lazy - support variable # of questions per task eventually.. (userid, cid, _, q1, q2, comments) = survey responses = dict(zip(questions, [q1, q2, comments])) cursor.execute( '''SELECT agent_types, agent_ids FROM chat WHERE chat_id=?''', (cid, )) chat_result = cursor.fetchone() agents = json.loads(chat_result[0]) agent_ids = json.loads(chat_result[1]) agent_types[cid] = agents if cid not in survey_data.keys(): survey_data[cid] = {0: {}, 1: {}} partner_idx = 0 if agent_ids['1'] == userid else 1 survey_data[cid][partner_idx] = responses write_json([agent_types, survey_data], json_path)
def write_metadata(cls, transcripts, outdir, responses=None): metadata = {'data': []} for chat in transcripts: if len(chat['events']) == 0: continue row = {} row['dialogue_id'] = chat['uuid'] row['scenario_id'] = chat['scenario_uuid'] scenario = cls.get_scenario(chat) row['num_items'] = len(scenario.kbs[0].items) row['num_attrs'] = len(scenario.attributes) row['outcome'] = 'fail' if chat['outcome'][ 'reward'] == 0 else 'success' row['agent0'] = cls.agent_labels[chat['agents']['0']] row['agent1'] = cls.agent_labels[chat['agents']['1']] if responses: dialogue_response = responses[chat['uuid']] question_scores = defaultdict(list) for agent_id, scores in dialogue_response.iteritems(): for question in cls.questions: question_scores[question].extend(scores[question]) for question, scores in question_scores.iteritems(): row[question] = np.mean(scores) metadata['data'].append(row) write_json(metadata, os.path.join(outdir, 'metadata.json'))
def log_examples_with_templates(self, examples, log): for example in examples: if Preprocessor.skip_example(example): continue for event in example.events: template_id = event.template if template_id is not None: event.template = self.templates[template_id] write_json([ex.to_dict() for ex in examples], log)
def log_worker_id_to_json(db_path, batch_results): ''' {chat_id: {'0': worker_id; '1': worker_id}} ''' conn = sqlite3.connect(db_path) cursor = conn.cursor() code_to_wid = read_results_csv(batch_results) worker_ids = chat_to_worker_id(cursor, code_to_wid) output_dir = os.path.dirname(batch_results) write_json(worker_ids, output_dir + '/worker_ids.json')
def train(self, examples, preprocessor, output_path, stem=False): d = self.get_collocate_data(examples, preprocessor, stem=stem) scores = defaultdict(dict) for role in ('seller', 'buyer'): for category in ('car', 'housing', 'phone', 'electronics', 'furniture', 'bike'): scores[role][category] = self.compute_scores(d, role=role, category=category) write_json(dict(scores), output_path) return scores
def dump_reviewed_chat(cls, cursor, json_path): review_info = {} cursor.execute('SELECT chat_id, accept, message FROM review') for _chat_id, _accept, _message in cursor.fetchall(): review_info[_chat_id] = {} review_info[_chat_id]['accept'] = _accept _outcome = DatabaseReader.get_chat_outcome(cursor, _chat_id)['reward'] review_info[_chat_id]['success'] = _outcome review_info[_chat_id]['message'] = _message write_json(review_info, json_path)
def dump(self, output): print 'Dumping data to {}'.format(output) write_json(self.data, output)
print("Loading model from checkpoint ...") encoder = torch.load(args.init_from + args.checkpoint + 'encoder.pt') decoder = torch.load(args.init_from + args.checkpoint + 'decoder.pt') else: print("Creating new model...") encoder = GRU_Encoder(args.word_embed_size, args.num_layers) decoder = Attn_Decoder(args.word_embed_size, args.attn_method, args.dropout) else: # Save config if not os.path.isdir(args.checkpoint): os.makedirs(args.checkpoint) config_path = os.path.join(args.checkpoint, 'config.json') write_json(vars(args), config_path) model_args = args ckpt = None # Load vocab vocab_path = 'data/persona_vocab.pkl' if not os.path.exists(vocab_path): print 'Vocab not found at', vocab_path vocab = None args.ignore_cache = True else: print 'Loading vocab from', vocab_path vocab = read_pickle(vocab_path) # schema = Schema(model_args.schema_path, None) # train_batches = DialogueBatcher(vocab, "train")
parser.add_argument('--schema-path') parser.add_argument( '--scenario-ints-file', help= 'Path to the file containing 6 integers per line that describes the scenario' ) parser.add_argument('--output', help='Path to the output JSON scenario file') args = parser.parse_args() schema = Schema(args.schema_path) scenarios = [] with open(args.scenario_ints_file) as fin: kbs = [] names = ['book', 'hat', 'ball'] for line in fin: ints = [int(x) for x in line.strip().split()] kb = KB.from_ints(schema.attributes, names, ints) kbs.append(kb) if len(kbs) == 2: if kbs[0].item_counts != kbs[1].item_counts: del kbs[0] continue assert kbs[0].item_counts == kbs[1].item_counts scenario = Scenario(generate_uuid("FB"), schema.attributes, kbs) scenarios.append(scenario) kbs = [] scenario_db = ScenarioDB(scenarios) write_json(scenario_db.to_dict(), args.output)
print 'KB CONTEXT:', result.get('kb_context', None) print 'CANDIDATES:' for c in result['candidates']: if 'response' in c: print '----------' print c['hits'] print to_str(c['response']) #print c['context'] #print c['pos'] # Write a json file of all the candidates if args.test_examples_paths and args.retriever_output: dialogues = preprocessor.preprocess(dataset.test_examples) print 'Retrieving candidates for %s' % ','.join( args.test_examples_paths) start_time = time.time() results = [] for dialogue in dialogues: result = retriever.retrieve_candidates(dialogue, json_dict=True) results.extend(result) if args.verbose: for r in result: dump_result(r) print '[%d s]' % (time.time() - start_time) write_json(results, args.retriever_output) #prev_turns = ["I 'm a poor student . can you go lower ?".split()] #results = retriever.search('seller', 'furniture', '', prev_turns) #for r in results: # print r
help='Transciprts paths', nargs='*', default=[]) parser.add_argument('--train-frac', help='Fraction of training examples', type=float, default=0.6) parser.add_argument('--test-frac', help='Fraction of test examples', type=float, default=0.2) parser.add_argument('--dev-frac', help='Fraction of dev examples', type=float, default=0.2) parser.add_argument('--output-path', help='Output path for splits') args = parser.parse_args() np.random.seed(0) json_data = ([], [], []) for path in args.example_paths: examples = read_json(path) folds = np.random.choice( 3, len(examples), p=[args.train_frac, args.dev_frac, args.test_frac]) for ex, fold in izip(examples, folds): json_data[fold].append(ex) for fold, dataset in izip(('train', 'dev', 'test'), json_data): if len(dataset) > 0: write_json(dataset, '%s%s.json' % (args.output_path, fold))
import argparse import os from cocoa.core.util import read_json, write_json parser = argparse.ArgumentParser() parser.add_argument('--paths', nargs='+', help='Paths to transcripts directories') parser.add_argument('--output', help='Output directory') args = parser.parse_args() all_chats = [] # survey data structure: [{}, {}] all_surveys = [{}, {}] for d in args.paths: transcript_file = os.path.join(d, 'transcripts/transcripts.json') survey_file = os.path.join(d, 'transcripts/surveys.json') chats = read_json(transcript_file) all_chats.extend(chats) surveys = read_json(survey_file) for i, s in enumerate(surveys): all_surveys[i].update(s) print "Combined data from {}".format(d) output_dir = args.output + '/transcripts' if not os.path.isdir(output_dir): os.makedirs(output_dir) write_json(all_chats, os.path.join(output_dir, 'transcripts.json')) write_json(all_surveys, os.path.join(output_dir, 'surveys.json'))
parser.add_argument( '--worker-ids', nargs='+', help='Path to json file containing chat_id to worker_id mappings') parser.add_argument('--hist', default=False, action='store_true', help='Plot histgram of ratings') HTMLVisualizer.add_html_visualizer_arguments(parser) args = parser.parse_args() visualizer = Visualizer(args.dialogue_transcripts, args.survey_transcripts) visualizer.compute_effectiveness() if args.hist: visualizer.hist(question_scores, args.outdir, partner=args.partner) if args.worker_ids: visualizer.worker_stats() # TODO: move summary and hist to analyzer if args.summary: summary = visualizer.summarize() write_json(summary, args.stats) if args.html_output: visualizer.html_visualize(args.viewer_mode, args.html_output, css_file=args.css_file, img_path=args.img_path, worker_ids=visualizer.worker_ids)
def dump_results(self, output): """Write eval_results to a JSON file. """ print 'Writing {} evaluation results to {}'.format( len(self.eval_results), output) write_json(self.eval_results, output)
import argparse from cocoa.core.util import read_json, write_json from cocoa.core.scenario_db import ScenarioDB from cocoa.core.schema import Schema from core.scenario import Scenario parser = argparse.ArgumentParser() parser.add_argument('--chats') parser.add_argument('--scenarios') parser.add_argument('--max', type=int) args = parser.parse_args() chats = read_json(args.chats) scenarios = [] n = args.max or len(chats) for chat in chats[:n]: scenarios.append(Scenario.from_dict(None, chat['scenario'])) scenario_db = ScenarioDB(scenarios) write_json(scenario_db.to_dict(), args.scenarios)
def dump_db(self): write_json(self.db, self.db_path) print 'HIT results dumped to {}'.format(self.db_path)