def test_find_branches(self): DATA = """ C1 C11 XYZ C121 C122 C13 XYZ C21 C211 C3 XYZ C1 """ tree = graph.parse_text_tree(DATA) # find non-exist branches # current implementation think it is better to return ['NOT EXIST'] than [] branches = graph.find_branches(tree, 'NOT EXIST') children = [node.data for node,_ in branches.dfs()] self.assertEqual(children, ['NOT EXIST']) # find XYZ and its children branches = graph.find_branches(tree, 'XYZ') children = [node.data for node,_ in branches.dfs()] self.assertEqual(children, [ 'XYZ', 'C121', 'C122', 'C21', 'C211', ])
def query_by_tag(wlib, tag): """ @return - list of Position with items filled. """ branches = graph.find_branches(wlib.category.root, tag) assert branches positions = [] tag_set = sets.Set() # all tags # build positions for visit_record in branches.dfs_ctx(): node, idx, path, _ = visit_record pos = Position(node.data) positions.append(pos) visit_record[3] = pos # set prefix, set parent.children parent_vr = path and path[-1] or None if parent_vr: if len(path) == 1: pos.prefix = str(idx+1) else: pos.prefix = '%s.%s' % (path[-1][3].prefix, str(idx+1)) parent_vr[3].children.append(pos) # set parent_path pos.parent_path = [vr[3] for vr in reversed(path)] # update tag_set if pos.tag: tag_set.add(pos.tag) # note: branches is a non null Tree root_pos = positions[0] #for pos in positions: print pos # DBEUG #for pos, _ in root_pos.dfs(): print pos # DEBUG pos_rbfs = [pos for pos,_ in root_pos.bfs()] pos_rbfs.reverse() for page in wlib.webpages: # basic filtering rtags = [] # relevant tags itags = [] # irrelevant tags for tag in page.tags: if tag in tag_set: rtags.append(tag) else: itags.append(tag) if not rtags: continue itags.sort() for pos in positions: # clear markers first pos.trail_walked = False for pos in pos_rbfs: if pos.trail_walked: continue if pos.tag not in rtags: continue # should insert item in this position # calculate pos_rel with respect to this position pos_rel = [] for ppos in pos.parent_path: ppos.trail_walked = True if ppos.tag in rtags: pos_rel.append(0) else: pos_rel.append(1) pos.items.append((pos_rel, itags, page)) # TODO: between pos_rel and itags is irrelevant tag in tree. for pos in positions: pos.items.sort() return positions