def __init__(self, params): self.batch_size = params.batch_size self.word2id, self.id2word = dl.read_word2id(params.vocab_path, params.vocab_size) # print self.word2id self.names, self.values, self.val2attr, self.entities = dl.read_kb_value( params.kb_path) self.trn_dialog, self.trn_dialog_vect = dl.read_tracker_dialog( params.trn_usr_path, params.trn_sys_path, self.word2id, params.max_len) self.dev_dialog, self.dev_dialog_vect = dl.read_tracker_dialog( params.dev_usr_path, params.dev_sys_path, self.word2id, params.max_len) self.tst_dialog, self.tst_dialog_vect = dl.read_tracker_dialog( params.tst_usr_path, params.tst_sys_path, self.word2id, params.max_len) self.trn_pos = dl.get_tracker_label_pos(self.trn_dialog, params.trn_label_dir) self.dev_pos = dl.get_tracker_label_pos(self.dev_dialog, params.dev_label_dir) self.tst_pos = dl.get_tracker_label_pos(self.tst_dialog, params.tst_label_dir) self.num_train = len(self.trn_label['cuisine']) self.num_dev = len(self.dev_label['cuisine']) self.num_test = len(self.tst_label['cuisine']) print '\tNumber of samples: train: %d, dev: %d, test: %d' % ( self.num_train, self.num_dev, self.num_test) self._pointer = 0
def __init__(self, params): self.batch_size = params.batch_size self.vocab_size = params.vocab_size self.utc_length = params.utc_length self.turn_num = params.turn_num self.act_size = params.act_size self.id2act = [ 'you_are_welcome', 'request_food', 'api', 'reservation', 'hello', 'inform_address', 'inform_phone', 'request_number', 'on_it', 'any_help', 'find_options', 'update', 'another_option', 'recommend', 'request_area', 'request_price' ] self.word2id, self.id2word = dl.read_word2id(params.vocab_path, params.vocab_size) # print self.word2id self.names, self.values, self.val2attr, self.entities = dl.read_kb_value( params.kb_path) self.train_usr, self.train_sys, train_api = dl.read_dialog( params.train_path) self.dev_usr, self.dev_sys, dev_api = dl.read_dialog(params.dev_path) self.test_usr, self.test_sys, test_api = dl.read_dialog( params.test_path) self.train_label = dl.get_template_label(params.train_path, params.template_path, params.kb_path) self.dev_label = dl.get_template_label(params.dev_path, params.template_path, params.kb_path) self.test_label = dl.get_template_label(params.test_path, params.template_path, params.kb_path) # Merge the history turns. The number of turns to be merged is decided by params.turn_num train_input = dl.merge_dialog(self.train_usr, self.train_sys, params.turn_num) dev_input = dl.merge_dialog(self.dev_usr, self.dev_sys, params.turn_num) test_input = dl.merge_dialog(self.test_usr, self.test_sys, params.turn_num) # Flatten all history of a turn into a single string train_input = dl.flatten_history(train_input) dev_input = dl.flatten_history(dev_input) test_input = dl.flatten_history(test_input) # Convert the strings to indexes train_input_id = dl.convert_2D_str2id(train_input, self.word2id, self.names, self.val2attr, params.turn_num * params.utc_length, back=True) dev_input_id = dl.convert_2D_str2id(dev_input, self.word2id, self.names, self.val2attr, params.turn_num * params.utc_length, back=True) test_input_id = dl.convert_2D_str2id(test_input, self.word2id, self.names, self.val2attr, params.turn_num * params.utc_length, back=True) # Get number of restaurant in api_call result train_api_number = dl.get_api_number(train_api, train_input) dev_api_number = dl.get_api_number(dev_api, dev_input) test_api_number = dl.get_api_number(test_api, test_input) # Flatten the 2D list to 1D (Merge all dialogs into a single list) self.train_input_id = dl.flatten_2D(train_input_id) self.dev_input_id = dl.flatten_2D(dev_input_id) self.test_input_id = dl.flatten_2D(test_input_id) self.train_api_num = dl.flatten_2D(train_api_number) self.dev_api_num = dl.flatten_2D(dev_api_number) self.test_api_num = dl.flatten_2D(test_api_number) self.num_train = len(self.train_input_id) self.num_dev = len(self.dev_input_id) self.num_test = len(self.test_input_id) # m = 4 # print self.train_usr[0] # print self.train_sys[0] # print self.train_input_id[m] # print self.train_label[m] # print self.train_api_num[m] print '\tNumber of turns: train: %d, dev: %d, test: %d' % ( self.num_train, self.num_dev, self.num_test) self._pointer = 0
def __init__(self, params): self.batch_size = params.batch_size self.vocab_size = params.vocab_size self.utc_length = params.utc_length self.turn_num = params.turn_num self.word2id, self.id2word = dl.read_word2id(params.vocab_path, params.vocab_size) self.names, self.values, self.val2attr, self.entities = dl.read_kb_value( params.kb_path) self.train_usr, self.train_sys, train_api = dl.read_dialog( params.train_path) self.dev_usr, self.dev_sys, dev_api = dl.read_dialog(params.dev_path) self.test_usr, self.test_sys, test_api = dl.read_dialog( params.test_path) # Merge the history turns. The number of turns to be merged is decided by params.turn_num train_input = dl.merge_dialog(self.train_usr, self.train_sys, params.turn_num) dev_input = dl.merge_dialog(self.dev_usr, self.dev_sys, params.turn_num) test_input = dl.merge_dialog(self.test_usr, self.test_sys, params.turn_num) # Flatten all history of a turn into a single string train_input = dl.flatten_history(train_input) dev_input = dl.flatten_history(dev_input) test_input = dl.flatten_history(test_input) # Convert the strings to indexes train_input_id = dl.convert_2D_str2id(train_input, self.word2id, self.names, self.val2attr, params.turn_num * params.utc_length, back=True) dev_input_id = dl.convert_2D_str2id(dev_input, self.word2id, self.names, self.val2attr, params.turn_num * params.utc_length, back=True) test_input_id = dl.convert_2D_str2id(test_input, self.word2id, self.names, self.val2attr, params.turn_num * params.utc_length, back=True) train_output_id = dl.convert_2D_str2id(self.train_sys, self.word2id, self.names, self.val2attr, params.utc_length, add_headrear=True) dev_output_id = dl.convert_2D_str2id(self.dev_sys, self.word2id, self.names, self.val2attr, params.utc_length, add_headrear=True) test_output_id = dl.convert_2D_str2id(self.test_sys, self.word2id, self.names, self.val2attr, params.utc_length, add_headrear=True) # Get number of restaurant in api_call result train_api_number = dl.get_api_number(train_api, train_input) dev_api_number = dl.get_api_number(dev_api, dev_input) test_api_number = dl.get_api_number(test_api, test_input) # Flatten the 2D list to 1D (Merge all dialogs into a single list) self.train_input_id = dl.flatten_2D(train_input_id) self.dev_input_id = dl.flatten_2D(dev_input_id) self.test_input_id = dl.flatten_2D(test_input_id) self.train_output_id = dl.flatten_2D(train_output_id) self.dev_output_id = dl.flatten_2D(dev_output_id) self.test_output_id = dl.flatten_2D(test_output_id) self.train_api_num = dl.flatten_2D(train_api_number) self.dev_api_num = dl.flatten_2D(dev_api_number) self.test_api_num = dl.flatten_2D(test_api_number) self.num_train = len(self.train_input_id) self.num_dev = len(self.dev_input_id) self.num_test = len(self.test_input_id) print '\tNumber of turns: train: %d, dev: %d, test: %d' % ( self.num_train, self.num_dev, self.num_test) self._pointer = 0