示例#1
0
 def compute_updating_relation(self, obj):
     list_rela = []
     # print("新个人数据网络的实例个数:", len(self.list_network))
     for i in range(len(self.list_network)):
         iter_obj = self.list_network[i]  # 迭代的对象
         original_content = obj.get_content()
         if (original_content == "") or (original_content is None):
             original_content = str(obj.get_keywords())
         if obj is iter_obj:
             continue
         iterate_content = iter_obj.get_content() + iter_obj.get_title()
         if (iterate_content == "") or (iterate_content is None):
             iterate_content = str(iter_obj.get_keywords())
         # print("original_content:", original_content)
         # print("iter_cont:", iterate_content)
         # 计算两个不同文本的相似性
         rate = Levenshtein.ratio(original_content, iterate_content)
         # print("rate:", rate)
         # 判断如果相似率大于阈值,则存入个人数据网络中
         if rate >= self.ratio:
             # print("rate:", rate)
             # obj新插入的对象; iter_obj是老对象
             # print("新对象对应的Class:", obj.__class__.__name__,
             #       ", type:", type(obj.__class__.__name__))
             # 获取联系
             relation = cm.get_relation(iter_obj.__class__, obj.__class__)
             # 存入初始个人数据网络,字典数据结构
             dict_relation = {
                 "pre_id": iter_obj.get_id(),
                 "relation": relation,
                 "post_id": obj.get_id(),
                 "pre_Class": iter_obj.__class__.__name__,
                 "post_Class": obj.__class__.__name__,
                 "pre_Activity": iter_obj.get_activity(),
                 "post_Activity": obj.get_activity(),
                 "pre_service": iter_obj.get_service(),
                 "pre_title": iter_obj.get_title(),
                 "pre_content": iter_obj.get_content(),
                 "post_service": obj.get_service(),
                 "post_title": obj.get_title(),
                 "post_content": obj.get_content()
             }
             if relation != "Update":
                 print("relation:", dict_relation)
             # 将每一个新联系加载到队列中
             # print("新联系:", dict_relation)
             list_rela.append(dict_relation)
     return list_rela
    def initial_data_status(self):
        counter = 0  # 计数器
        # 查询所有记录
        cursors = self.collection.find()
        for data in cursors:
            # 大于初始数据网络的一条数据处理
            if counter >= self.k_no:
                break
            obj = self.create_class_obj(data)
            self.list_network.append(obj)
            counter += 1  # 计数器
            print("counter:", counter)
            # 查找联系
            if counter >= 2:
                for i in range(len(self.list_network)):
                    iter_obj = self.list_network[i]  # 迭代的对象
                    original_content = obj.get_content()
                    if (original_content == "") or (original_content is None):
                        original_content = str(obj.get_keywords())
                    if obj is iter_obj:
                        continue
                    iterate_content = iter_obj.get_content() + \
                        iter_obj.get_title()
                    if (iterate_content == "") or (iterate_content is None):
                        iterate_content = str(iter_obj.get_keywords())
                    print("original_content:", original_content)
                    print("iter_cont:", iterate_content)
                    # 计算两个不同文本的相似性
                    rate = Levenshtein.ratio(original_content, iterate_content)
                    print("rate:", rate)
                    # 判断如果相似率大于阈值,则存入个人数据网络中
                    if rate >= self.ratio:
                        # obj新插入的对象; iter_obj是老对象
                        # print("新对象对应的Class:", obj.__class__.__name__,
                        #       ", type:", type(obj.__class__.__name__))

                        # 获取联系
                        relation = cm.get_relation(iter_obj.__class__,
                                                   obj.__class__)
                        # 存入初始个人数据网络,字典数据结构
                        dict_relation = {
                            "pre_id": iter_obj.get_id(),
                            "relation": relation,
                            "post_id": obj.get_id(),
                            "pre_Class": iter_obj.__class__.__name__,
                            "post_Class": obj.__class__.__name__,
                            "pre_Activity": iter_obj.get_activity(),
                            "post_Activity": obj.get_activity()
                        }
                        self.list_network_relation.append(dict_relation)

        # self.list_network.extend(self.list_network_relation)
        if self.list_network:
            for i in range(len(self.list_network)):
                print("i:", i, ", list_network:", self.list_network[i])
        else:
            print("list_network is empty!")
        if self.list_network_relation:
            for i in range(len(self.list_network_relation)):
                print("i:", i, ", list_network:",
                      self.list_network_relation[i])
        else:
            print("list_network_relation is empty!")
        print("counter:", counter)
示例#3
0
    def initial_data_status(self):
        counter = 0  # 计数器
        # 查询所有记录
        cursors = self.collection.find().skip(self.kip)
        for data in cursors:
            # 大于初始数据网络的一条数据处理
            if counter >= self.k_no:
                break
            # 设置Linkedin用户状态数据的初始状态
            if counter == self.k_no - 1:
                # print("最后一条记录!!")
                self.flag = self.initialize_linked_status(data)
            print("service:", data.get("服务ID"))
            obj = self.create_class_obj(data)  # 创建对象
            self.list_network.append(obj)
            counter += 1  # 计数器
            print("counter:", counter)
            # 查找联系
            if counter >= 2:
                for i in range(len(self.list_network)):
                    iter_obj = self.list_network[i]  # 迭代的对象
                    original_content = obj.get_content()
                    if (original_content == "") or (original_content is None):
                        original_content = str(obj.get_keywords())
                    if obj is iter_obj:
                        continue
                    iterate_content = iter_obj.get_content() + \
                        iter_obj.get_title()
                    if (iterate_content == "") or (iterate_content is None):
                        iterate_content = str(iter_obj.get_keywords())
                    # print("original_content:", original_content)
                    # print("iter_cont:", iterate_content)
                    # 计算两个不同文本的相似性
                    rate = Levenshtein.ratio(original_content, iterate_content)
                    # print("rate:", rate)
                    # 判断如果相似率大于阈值,则存入个人数据网络中
                    if rate >= self.ratio:
                        # obj新插入的对象; iter_obj是老对象
                        # print("新对象对应的Class:", obj.__class__.__name__,
                        #       ", type:", type(obj.__class__.__name__))

                        # 获取联系
                        relation = cm.get_relation(iter_obj.__class__,
                                                   obj.__class__)
                        # 存入初始个人数据网络,字典数据结构
                        dict_relation = {
                            "pre_id": iter_obj.get_id(),
                            "relation": relation,
                            "post_id": obj.get_id(),
                            "pre_Class": iter_obj.__class__.__name__,
                            "post_Class": obj.__class__.__name__,
                            "pre_Activity": iter_obj.get_activity(),
                            "post_Activity": obj.get_activity(),
                            "pre_service": iter_obj.get_service(),
                            "pre_content": iter_obj.get_content(),
                            "pre_title": iter_obj.get_title(),
                            "post_service": obj.get_service(),
                            "post_content": obj.get_content(),
                            "post_title": obj.get_title()
                        }
                        self.list_network_relation.append(dict_relation)

        # self.list_network.extend(self.list_network_relation)
        # 1.节点
        print("节点:")
        if self.list_network:
            for i in range(len(self.list_network)):
                print("i:", i, ", list_network:", self.list_network[i])
        else:
            print("list_network is empty!")
        # 2.联系
        print("联系")
        if self.list_network_relation:
            for i in range(len(self.list_network_relation)):
                print("i:", i, ", list_network:",
                      self.list_network_relation[i])
        else:
            print("list_network_relation is empty!")
        # 3.输出用户职位数据 linkedin
        if self.position:
            for i in range(len(self.position)):
                print("i:", ", position:", self.position[i])
        else:
            print("list_position is Empty!")
        print("counter:", counter)