Пример #1
0
    def _get_ids(self, seg_info):
        if len(seg_info) < 1:
            return [0], [0]
        bt = seg_info.split('\3')
        if len(self.term_dict) < 1:
            letter_ids = map(int, bt[0].split())[:self._flags.max_seq_len]
            word_ids = map(int, bt[1].split())[:self._flags.max_seq_len]
            return letter_ids, word_ids

        rq = convert_to_unicode("".join(bt))
        bl = [t for t in rq]
        letter_ids = []
        for t in bl:
            letter_ids.append(self.term_dict.get(t.lower(), 1))
            if len(letter_ids) >= self._flags.max_seq_len:
                break

        word_ids = []
        for t in bt:
            t = convert_to_unicode(t)
            word_ids.append(self.term_dict.get(t.lower(), 1))
            if len(word_ids) >= self._flags.max_seq_len:
                break
        return letter_ids, word_ids
Пример #2
0
    def _init_dict(self):
        """
            init dict
        """
        if self.inited_dict:
            return
        if self._flags.platform in ('local-gpu', 'pserver-gpu', 'slurm'):
            gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
            self.place = fluid.CUDAPlace(gpu_id)
        else:
            self.place = fluid.CPUPlace()

        self.term_dict = {}
        if self._flags.qac_dict_path is not None:
            with open(self._flags.qac_dict_path, 'r') as f:
                for line in f:
                    term, term_id = line.strip('\r\n').split('\t')
                    term = convert_to_unicode(term)
                    self.term_dict[term] = int(term_id)

        self.inited_dict = True
        sys.stderr.write("loaded term dict:%s\n" % (len(self.term_dict)))