Пример #1
0
    def cluster(self, data, keys, save_path):
        self._clust_len = len(self._cluster) - 1
        self._basic_cluster(data, keys, self._cluster_tree, 0)

        _file.op_file(file_path=save_path,
                      data=self._cluster_tree,
                      model='json',
                      method='save')
        """
Пример #2
0
    def search_tencent(self,
                       dts,
                       root_path,
                       branchs=2,
                       candidate=1,
                       distance=3):
        root = _file.op_file(root_path, method='read')
        dist = {}
        for idx, i in enumerate(root['center_point']):
            val = data.point_distance(dts, i)
            dist[round(val, 3)] = idx

        keys = list(dist.keys())
        keys.sort()
        sel_point = len(root['center_point']) if len(
            root['center_point']) < branchs[0] else branchs[0]
        branchs.pop(0)

        all_res = []
        for j in range(sel_point):
            k = 'file' + str(dist[keys[j]])
            path = root['festival'][k]
            sr = self.similirity(dts, path, branchs, candidate, distance)
            all_res.extend(sr)

        boult = [g for g in sorted(all_res, key=lambda k: k[0], reverse=False)]
        save_len = candidate if candidate < len(boult) else len(boult)
        return boult[0:save_len]
Пример #3
0
    def take9_file(self, root_path):
        file_tree = {
            "tree0": 0,
            "tree1": 0,
            "tree2": 0,
            "tree3": 0,
            "tree4": 0,
            "tree5": 0,
            "tree6": 0,
            "tree7": 0,
            "tree8": 0
        }
        for f in range(3, 9):
            key = 'tree' + str(f)

            f_p = 'data/tencent/tree' + str(f) + '.json'
            file_tree[key] = _file.op_file(f_p, method='read')

            vals = list(file_tree[key].values())
            ks = list(file_tree[key].keys())

            sp = 'data/tencent/tc_tree' + str(f) + '.json'
            self.cluster(vals, ks, sp)
            del file_tree[key]
            del self._cluster_tree

            self._cluster_tree = {
                "position": 'root',
                "festival": [],
                "center_point": None
            }
            print(ord)
Пример #4
0
    def tencent(self, data, words, clusters=[5]):
        _kmeans_tree = {"position": "root", "center_point": [], "festival": {}}

        class_data = {}

        one = clusters.pop(0)
        km = KMeans(init="k-means++", n_clusters=one)
        km.fit_predict(data)
        points = []

        for j, i in enumerate(km.cluster_centers_):
            key = 'file' + str(j)
            points.append(list(i))
            class_data[key] = {}
        _kmeans_tree['center_point'] = points

        # 将所有数据按类分开,存成字典
        for a, b in enumerate(km.labels_):
            key2 = 'file' + str(b)
            class_data[key2][words[a]] = data[a]

        # 各类存到不同的文件
        for idx in range(one):
            key1 = 'file' + str(idx)
            save_path = 'data/tree' + str(idx) + '.json'
            _kmeans_tree['festival'][key1] = save_path
            _file.op_file(file_path=save_path,
                          data=class_data[key1],
                          model='json',
                          method='save')
            # 保存后删除
            del class_data[key1]
        # 存储根节点查找文件
        _file.op_file(file_path='data/root.json',
                      data=_kmeans_tree,
                      model='json',
                      method='save')
Пример #5
0
    def similirity(self,
                   data,
                   file_path,
                   branchs=[2, 2],
                   candidate=3,
                   distance=15):

        self._max_dist = distance
        self._search_branch = branchs
        self._search_result = []

        result = _file.op_file(file_path, model='json', method='read')
        self.search_tree(data, result)

        sr = [
            c for c in sorted(
                self._search_result, key=lambda k: k[0], reverse=False)
        ]
        save_len = candidate if candidate < len(sr) else len(sr)
        return sr[0:save_len]