def __init__(self, csv_file, train_data=None, flag='train'): self.csv_file = csv_file checkExistence(self.csv_file) self._load_data() if flag == 'test': assert isinstance( train_data, DataSetCSV), 'train_data is not an instance of DataSetCSV' self.userIntent2id = train_data.userIntent2id self.id2userIntent = train_data.id2userIntent self.userIntent_vocab_size = train_data.userIntent_vocab_size self.userTag2id = train_data.userTag2id self.id2userTag = train_data.id2userTag self.userTag_vocab_size = train_data.userTag_vocab_size self.agentAct2id = train_data.agentAct2id self.id2agentAct = train_data.id2agentAct self.agentAct_vocab_size = train_data.agentAct_vocab_size self.word2id = train_data.word2id self.id2word = train_data.id2word self.word_vocab_size = train_data.word_vocab_size self.userTagIntent2id = train_data.userTagIntent2id self.id2userTagIntent = train_data.id2userTagIntent self.userTagIntent_vocab_size = train_data.userTagIntent_vocab_size self.userTagIntentAgentPrevAct2id = train_data.userTagIntentAgentPrevAct2id self.id2userTagIntentAgentPrevAct = train_data.id2userTagIntentAgentPrevAct self.userTagIntentAgentPrevAct_vocab_size = train_data.userTagIntentAgentPrevAct_vocab_size elif flag == 'train': self._get_params() else: raise Exception('Unknown flag found: {}'.format(flag))
def readTagPredTxt(tag_pred_txt, userTag2id, sample_nb, userTag_vocab_size): checkExistence(tag_pred_txt) indicator = np.zeros((sample_nb, userTag_vocab_size)) with open(tag_pred_txt, 'rb') as f: for idx, line in enumerate(f): for tag in line.strip().split(): tag = 'tag-{}'.format(tag) if tag in userTag2id: pos = userTag2id[tag] - 1 else: pos = 0 indicator[idx, pos] = 1. return indicator
def readIntentPredTxt(intent_pred_txt, userIntent2id, sample_nb, userIntent_vocab_size): checkExistence(intent_pred_txt) indicator = np.zeros((sample_nb, userIntent_vocab_size)) with open(intent_pred_txt, 'rb') as f: for idx, line in enumerate(f): for intent in line.strip().split(';'): if intent == 'null': continue intent = 'intent-{}'.format(intent) if intent in userIntent2id: pos = userIntent2id[intent] - 1 else: pos = 0 indicator[idx, pos] = 1. return indicator
def load_model(self): print('Loading model ...') # check existence of params assert os.path.exists( self.model_folder), 'model_fold is not found: {}'.format( self.model_folder) assert self.weights_fname is not None, 'Argument required: --weights-file' checkExistence(self.weights_fname) model_graph = '{}/graph-arch.yaml'.format(self.model_folder) model_train_vars = '{}/other_vars.npz'.format(self.model_folder) checkExistence(model_graph) checkExistence(model_train_vars) from keras.models import model_from_yaml with open(model_graph, 'r') as fgraph: self.model = model_from_yaml(fgraph.read()) self.model.load_weights(self.weights_fname) npzfile = np.load(model_train_vars) self.maxlen_userUtter = np.int32(npzfile['maxlen_userUtter'][()]) self.word_vocab_size = np.int32(npzfile['word_vocab_size'][()]) self.userTag_vocab_size = np.int32(npzfile['userTag_vocab_size'][()]) self.userIntent_vocab_size = np.int32( npzfile['userIntent_vocab_size'][()]) self.id2userTag = npzfile['id2userTag'][()] self.id2word = npzfile['id2word'][()] self.id2userIntent = npzfile['id2userIntent'][()] self.userTag2id = npzfile['userTag2id'][()]
def load_model(self): print('Loading model ...') # check existence of params assert os.path.exists(self.model_folder), 'model_folder is not found: {}'.format(self.model_folder) assert self.threshold is not None, 'Argument required: --threshold' assert self.weights_fname is not None, 'Argument required: --weights-file' checkExistence(self.weights_fname) model_graph = '{}/graph-arch.yaml'.format(self.model_folder) model_train_vars = '{}/other_vars.npz'.format(self.model_folder) checkExistence(model_graph) checkExistence(model_train_vars) # load models from keras.models import model_from_yaml with open(model_graph, 'r') as fgraph: self.model = model_from_yaml(fgraph.read()) self.model.load_weights(self.weights_fname) npzfile = np.load(model_train_vars) self.agentAct_vocab_size = np.int32(npzfile['agentAct_vocab_size'][()]) self.userTagIntent_vocab_size = np.int32(npzfile['userTagIntent_vocab_size'][()]) self.id2agentAct = npzfile['id2agentAct'][()] self.window_size = np.int32(npzfile['window_size'][()])
action='store_true', help= 'perform testing for oracle models (CRFtagger, OneVsRest SVMs) and their pipelined model if this option is activated.' ) parser.add_argument('--model-folder', dest='model_folder', help='model folder') args = parser.parse_args() argparams = vars(args) train_only = argparams['train_only'] test_only = argparams['test_only'] assert train_only or test_only, 'Argument required: either --train, --test, or both.' # load train and test data npz_file = argparams['data_npz'] checkExistence(npz_file) data_npz = np.load(npz_file) train_data = data_npz['train_data'][()] dev_data = data_npz['dev_data'][()] test_data = data_npz['test_data'][()] ################################################################################### ##### Training SlotTagging, Intent Prediction, and AgentAct Prediction models ##### ################################################################################### if train_only: if argparams['model_folder'] is None: pid = os.getpid() argparams['model_folder'] = './model/baseline_{}'.format(pid) if not os.path.exists(argparams['model_folder']): os.makedirs(argparams['model_folder'])
args['weights_fname'] = None args['threshold'] = None # argparams = vars(args) argparams = args # print(type(argparams)) # early stop criteria are different for two tasks, therefore one model is # chosen for each. test_tag_only = argparams['test_tag_only'] test_intent_only = argparams['test_intent_only'] train_only = argparams['train_only'] assert train_only or test_tag_only or test_intent_only, 'Arguments required: either --train, --test-tag, or --test-intent' pid = os.getpid() argparams['pid'] = pid npz_fname = argparams['data_npz'] checkExistence(npz_fname) data_npz = np.load(npz_fname) if train_only: # train model argparams['train_data'] = data_npz['train_data'][()] argparams['dev_data'] = data_npz['dev_data'][()] argparams['test_data'] = None model = SlotTaggingModel(**argparams) model.train() else: # train_only is False, while test_only is True # need to load model argparams['train_data'] = None argparams['dev_data'] = None argparams['test_data'] = None if argparams['model_folder'] is None: raise Exception('Argument required: --model-folder')
parser.add_argument('--model-folder', dest='model_folder', help='model folder') args = parser.parse_args() argparams = vars(args) pid = os.getpid() npz_file = argparams['npz_file'] intent_model_weights = argparams['intent_weights'] tag_model_weights = argparams['tag_weights'] act_model_weights = argparams['act_weights'] threshold_intent = argparams['intent_threshold'] tune_threshold = argparams['tune_threshold'] threshold_act = argparams['act_threshold'] # validate params checkExistence(npz_file) checkExistence(intent_model_weights) checkExistence(tag_model_weights) checkExistence(act_model_weights) assert threshold_intent is not None, 'Argument required: --intent-threshold' for key in sorted(argparams.keys()): print('\t{}={}'.format(key, argparams[key])) # load test data data_npz = np.load(npz_file) if tune_threshold: dev_result_folder = './model/pipe_{}/dev'.format(pid) if not os.path.exists(dev_result_folder): os.makedirs(dev_result_folder) print('\tdev_result_folder={}'.format(dev_result_folder))
# formatter_class=argparse.ArgumentDefaultsHelpFormatter) # parser.add_argument('--iob-file', dest='iob_file', # help='.iob file in DSTC4') # parser.add_argument('--csv-file', dest='csv_file', # help='the path of converted .csv file in DSTC4') # parser.add_argument('--root-subdialogs', dest='root_subdialogs', # help='the root directory of DSTC4 subdialogs.') # args = parser.parse_args() # iob_file = args.iob_file # iob_file='data/iob/dstc4.all.w-intent.train.iob' # checkExistence(iob_file) # # csv_file = args.csv_file # csv_file='data/csv/dstc4.all.w-intent.train.csv' # # root_subdialogs = args.root_subdialogs # root_subdialogs='data/DSTC5/data/' # checkExistence(root_subdialogs) iob_file = 'data/iob/dstc4.all.w-intent.test.iob' checkExistence(iob_file) csv_file = 'data/csv/dstc4.all.w-intent.test.csv' root_subdialogs = 'data/DSTC5/data/' checkExistence(root_subdialogs) utter_search = transformLabelJson_another(root_subdialogs) for k, v in utter_search.items(): print(k, '--', v) dct_lst = readIOB(iob_file, utter_search) output_lst, fieldnames = mergeCSV(dct_lst) writeCSV(output_lst, csv_file, fieldnames)