Пример #1
0
    def test_query_word_pos(self):
        self.assertTrue(True)

        poses = read_obj('word.pos')
        counts = read_obj('word.count')

        multiples = defaultdict(set)
        # poses = defaultdict(set)
        for w, ps in poses.items():
            s = set([x[0] for x in ps])
            if len(s) > 1:
                multiples[w] |= ps

        results = [(w, ps, counts[w]) for w, ps in multiples.items()]
        results = sorted(results, key=lambda tp: tp[2], reverse=True)

        for w, ps, c in results:
            if c < 5:
                break
            print(w, c, ps)

        print('total:', len(counts))
        print('multiple:', len(multiples))
        print('percent:', 1.0 * len(multiples) / len(counts))

        print(poses['真'])
Пример #2
0
    def test_frequent_pruned(self):
        self.assertTrue(True)

        frequents = utils.read_obj(
            os.path.join(RESOURCE_DIR, 'mobile.itemsets.pruned.1'))
        for f in frequents:
            print(type(f))
Пример #3
0
    def test_show_opinion_counter(self):
        self.assertTrue(True)

        ocounter = read_obj(os.path.join(RESOURCE_DIR, 'dp',
                                         'opinion.counter'))
        for o, c in ocounter.most_common():
            print(o, c)
Пример #4
0
    def test_show_feature_counter(self):
        self.assertTrue(True)

        fcounter = read_obj(os.path.join(RESOURCE_DIR, 'dp',
                                         'feature.counter'))
        for f, c in fcounter.most_common():
            print(f, c)
Пример #5
0
    def test_extract(self):
        self.assertTrue(True)
        '''
        R = []
        source_dir = os.path.join(RESOURCE_DIR, 'parsed3')
        i = 0
        for d in os.listdir(source_dir):
            i += 1
            print i

            R += read_obj(os.path.join(source_dir, d))
            if len(R) >= 50000:
                break

        save_obj(R, os.path.join(RESOURCE_DIR, 'dp', 'dp.R'))
        '''

        R = read_obj(os.path.join(RESOURCE_DIR, 'dp', 'dp.R'))

        R = [sentence for _, parsed in R for sentence in parsed['sentences']]

        # R = ['价格实惠']

        print('单句总数:', len(R))

        O = {'不错', '漂亮', '流畅', '方便', '高', '持久'}

        F, O_expanded = double_propagation.extract(O, R, parsed=True)
        write_file(os.path.join(RESOURCE_DIR, 'dp', 'dp.features'), F)
        write_file(os.path.join(RESOURCE_DIR, 'dp', 'dp.opinions'), O_expanded)
Пример #6
0
    def test_frequent_support(self):
        self.assertTrue(True)

        frequents = utils.read_obj(
            os.path.join(RESOURCE_DIR, 'mobile.itemsets'))
        for itemset, support in frequents:
            print(itemset, support)
        print(len(frequents))
Пример #7
0
    def test_prune_xx(self):
        self.assertTrue(True)

        F = read_obj(os.path.join(RESOURCE_DIR, 'dp', 'dp.F'))
        O = read_obj(os.path.join(RESOURCE_DIR, 'dp', 'dp.O'))
        fcounter = read_obj(os.path.join(RESOURCE_DIR, 'dp', 'dp.fcounter'))
        ocounter = read_obj(os.path.join(RESOURCE_DIR, 'dp', 'dp.ocounter'))

        print('len1: ', len(F))

        F, O = double_propagation.prune_by_threshold(F, O, fcounter, ocounter)

        print('len2: ', len(F))

        F = double_propagation.prune_order_features(F, fcounter)

        print('len3: ', len(F))

        save_obj(F, os.path.join(RESOURCE_DIR, 'dp', 'dp.F.pruned'))
Пример #8
0
    def test_show_count(self):
        self.assertTrue(True)

        ff_counter = utils.read_obj(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'ff.counter'))
        oo_counter = utils.read_obj(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'oo.counter'))
        fo_counter = utils.read_obj(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'fo.counter'))

        ff_dict = utils.read_obj(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'ff.dict'))
        oo_dict = utils.read_obj(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'oo.dict'))
        fo_dict = utils.read_obj(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'fo.dict'))

        print('-' * 10 + 'ff' + '-' * 10)
        for r, c in ff_counter.most_common(20):
            print(r, c)

        print('-' * 10 + 'oo' + '-' * 10)
        for r, c in oo_counter.most_common(20):
            print(r, c)

        print('-' * 10 + 'fo' + '-' * 10)
        for r, c in fo_counter.most_common(20):
            print(r, c)

        for relation in ff_dict:
            utils.write_file(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'samples', 'ff_{}.txt'.format(relation)),
                             ff_dict[relation])

        for relation in oo_dict:
            utils.write_file(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'samples', 'oo_{}.txt'.format(relation)),
                             oo_dict[relation])

        for relation in fo_dict:
            utils.write_file(os.path.join(RESOURCE_DIR, 'mobile', 'count', 'samples', 'fo_{}.txt'.format(relation)),
                             fo_dict[relation])
Пример #9
0
    def load(cls, model_file, keras_model_file):
        """
        :param model_file: Model对象保存的文件
        :param keras_model_file: keras model保存的文件
        :return:
        :rtype: BaseModel
        """
        kmodel = keras.models.load_model(keras_model_file)

        model = read_obj(model_file)
        model._model = kmodel

        return model
Пример #10
0
    def load(cls, keras_model_file=None):
        """
        :rtype: SBDModel
        """
        model = SBDModel(None)

        logger.info('loading model...')
        model = read_obj(model._model_file)

        # assert isinstance(model, Model)

        if keras_model_file is None:
            keras_model_file = model._keras_model_file

        logger.info('loading keras model...')
        model._model = keras.models.load_model(keras_model_file)
        return model
Пример #11
0
def get_features():
    # fcounter = utils.read_obj(os.path.join(RESOURCE_DIR, 'dp', 'feature.counter'))
    # return [f for f in fcounter if fcounter[f] > 1]

    F = utils.read_obj(os.path.join(RESOURCE_DIR, 'dp', 'dp.F'))
    return list(F)
Пример #12
0
 def load():
     """
     :return: HomoModel
     :rtype HomoModel
     """
     return read_obj(HomoModel.model_file)