def decode_sent(self, sentinfo, output_fname, config=None): if config == None: config = self.config t0 = time.time() self.X = {} self.y = {} self.baseXs = [] self.baseX_pointers = {} self.fnames = {} log_input_key = "batch" if config.has_option("decode", "log_input_key"): log_input_key = config.get("decode", "log_input_key") self.extractFeatures2(sentinfo, log_input_key=log_input_key) decode_results = self.decode() counter = defaultdict(int) active_tuples = self.tuples.activeTuples_sent(sentinfo) tuple_distribution = {} for this_tuple in active_tuples: index = counter[this_tuple] assert len(decode_results[this_tuple]) == 1 if len(decode_results[this_tuple]) - 1 < index: p = 0 else: p = decode_results[this_tuple][index] # p = decode_results[this_tuple][index] tuple_distribution[Tuples.generic_to_specific(this_tuple)] = p # check we are decoding the right utterance counter[this_tuple] += 1 slu_hyps = self.tuples.distributionToNbest(tuple_distribution) return slu_hyps
def extractFeatures(self, dw, log_input_key="batch"): # given a dataset walker, # adds examples to self.X and self.y total_calls = len(dw.session_list) print(total_calls) # print(dw.session_list) self.keys = set([]) for call_num, call in enumerate(dw): print('[%d/%d]' % (call_num, total_calls)) for log_turn, label_turn in call: if label_turn != None: uacts = label_turn['semantics']['json'] these_tuples = self.tuples.uactsToTuples(uacts) # check there aren't any tuples we were not expecting: for this_tuple in these_tuples: if this_tuple not in self.tuples.all_tuples: print("Warning: unexpected tuple", this_tuple) # convert tuples to specific tuples: these_tuples = [ Tuples.generic_to_specific(tup) for tup in these_tuples ] # which tuples would be considered (active) for this turn? active_tuples = self.tuples.activeTuples(log_turn) # calculate base features that are independent of the tuple baseX = defaultdict(float) for feature_extractor in self.feature_extractors: feature_name = feature_extractor.__class__.__name__ new_feats = feature_extractor.calculate( log_turn, log_input_key=log_input_key) # if new_feats != {}: # print('base feat:',new_feats.keys()) for key in new_feats: baseX[(feature_name, key)] += new_feats[key] self.keys.add((feature_name, key)) self.baseXs.append(baseX) # print('these_tuples',these_tuples) # print('active_tuples',active_tuples) for this_tuple in active_tuples: # print(this_tuple) if label_turn != None: y = (Tuples.generic_to_specific(this_tuple) in these_tuples) X = defaultdict(float) for feature_extractor in self.feature_extractors: feature_name = feature_extractor.__class__.__name__ new_feats = feature_extractor.tuple_calculate( this_tuple, log_turn, log_input_key=log_input_key) # if new_feats!={}: # print('tuple feat',new_feats.keys()) for key in new_feats: X[(feature_name, key)] += new_feats[key] self.keys.add((feature_name, key)) if this_tuple not in self.X: self.X[this_tuple] = [] if this_tuple not in self.y: self.y[this_tuple] = [] if this_tuple not in self.baseX_pointers: self.baseX_pointers[this_tuple] = [] # if this_tuple not in self.fnames : # self.fnames[this_tuple] = [] self.X[this_tuple].append(X) if label_turn != None: self.y[this_tuple].append(y) self.baseX_pointers[this_tuple].append( len(self.baseXs) - 1)