示例#1
0
    def __create_tree(self):
        sentinel = StandardNode()
        root = sentinel.add_child("root", NodeState.VISITED)
        i1 = root.add_child("issue1", NodeState.CLOSED)
        i1.add_child("article1-1", NodeState.CLOSED)
        i1.add_child("article1-2", NodeState.CLOSED)

        i2 = root.add_child("issue2", NodeState.VISITED)
        i2.add_child("article2-1", NodeState.OPEN)
        i2.add_child("article2-2", NodeState.CLOSED)
        i2.add_child("article2-3", NodeState.ERROR)

        root.add_child("issue3", NodeState.PROCESSING)

        i4 = root.add_child("issue4", NodeState.ERROR)
        i4.add_child("article4-1", NodeState.PROCESSING)

        t = ("root", NodeState.VISITED,
             [("issue1", NodeState.CLOSED,
               [("article1-1", NodeState.CLOSED, []),
                ("article1-2", NodeState.CLOSED, [])]),
              ("issue2", NodeState.CLOSED,
               [("article2-1", NodeState.OPEN, []),
                ("article2-2", NodeState.CLOSED, []),
                ("article2-3", NodeState.ERROR, [])]),
              ("issue3", NodeState.PROCESSING, []),
              ("issue4", NodeState.ERROR, [("article4-1", NodeState.PROCESSING,
                                            [])])])
        self.assert_(subtrees_equal(t, root))
        return (sentinel, t)
示例#2
0
 def test_basic_write_and_read(self):
     (sentinel, schema_root) = self.__create_tree()
     out = StringIO.StringIO()
     writer = XMLTreeWriter(out)
     writer.write(sentinel)
     out_str = out.getvalue()
     in_ = StringIO.StringIO(out_str)
     reader = XMLTreeReader(in_)
     new_sentinel = StandardNode()
     reader.read(new_sentinel)
     self.assert_(
         subtrees_equal(schema_root, new_sentinel.get_child("root")))
     out2 = StringIO.StringIO()
     writer2 = XMLTreeWriter(out2)
     writer2.write(new_sentinel)
     out2_str = out2.getvalue()
     self.assertEqual(out_str, out2_str)
示例#3
0
    def run(self):
        args = self.__parse()
        threads_no = args.threads
        logging_level = self.__get_logging_level(args)
        log_file_path = args.log_file
        schedule = self.__get_schedule(args.daily_schedule)

        navigators = self.__navigators_creator.create(args, threads_no)

        sentinel = StandardNode()
        prog = MultithreadedCrawler(navigators, sentinel, schedule,
                                    log_file_path, args.state_file,
                                    self.__save_period, logging_level)
        print "Starting activity with {} threads, "\
         "activity daily schedule: {}".format(
          threads_no, args.daily_schedule)
        prog.run()
        root = sentinel.get_child("root")

        self.__navigators_creator.on_exit()

        print "Done.\n"
        print self.__get_tree_summary(root, args.state_file, log_file_path)