def _get_ids(self, seg_info): if len(seg_info) < 1: return [0], [0] bt = seg_info.split('\3') if len(self.term_dict) < 1: letter_ids = map(int, bt[0].split())[:self._flags.max_seq_len] word_ids = map(int, bt[1].split())[:self._flags.max_seq_len] return letter_ids, word_ids rq = convert_to_unicode("".join(bt)) bl = [t for t in rq] letter_ids = [] for t in bl: letter_ids.append(self.term_dict.get(t.lower(), 1)) if len(letter_ids) >= self._flags.max_seq_len: break word_ids = [] for t in bt: t = convert_to_unicode(t) word_ids.append(self.term_dict.get(t.lower(), 1)) if len(word_ids) >= self._flags.max_seq_len: break return letter_ids, word_ids
def _init_dict(self): """ init dict """ if self.inited_dict: return if self._flags.platform in ('local-gpu', 'pserver-gpu', 'slurm'): gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) self.place = fluid.CUDAPlace(gpu_id) else: self.place = fluid.CPUPlace() self.term_dict = {} if self._flags.qac_dict_path is not None: with open(self._flags.qac_dict_path, 'r') as f: for line in f: term, term_id = line.strip('\r\n').split('\t') term = convert_to_unicode(term) self.term_dict[term] = int(term_id) self.inited_dict = True sys.stderr.write("loaded term dict:%s\n" % (len(self.term_dict)))