Exemplo n.º 1
0
 def test_same_logger(self):
     """
     singleton test
     """
     myLogger = mslogger.MSLogger()
     myLogger2 = mslogger.MSLogger()
     self.assertEqual(myLogger, myLogger2)
Exemplo n.º 2
0
 def __init__(self, configfile):
     """
     the invoker should make sure the configfile is exists!
     """
     self.__logger = mslogger.MSLogger().getLogger()
     self.__config = None
     self.__spiderSection = "spider"
     self.__urlListFile = ""
     self.__outputDirectory = ""
     self.__maxDepth = ""
     self.__crawlInterval = ""
     self.__crawlTimeout = ""
     self.__targetUrl = ""
     self.__threadCount = 0
     if not os.path.exists(configfile):
         msg = "file " + configfile + " not exist!"
         self.__logger.error(msg)
         return
     self.__config = configparser.ConfigParser(
         inline_comment_prefixes=(';'))
     self.__logger.info("configfile:" + configfile)
     self.__config.read(configfile, "utf-8")
     self.__spiderSection = "spider"
     self.__urlListFile = self.getSpiderConfig("url_list_file")
     self.__outputDirectory = self.getSpiderConfig("output_directory")
     self.__maxDepth = self.getSpiderConfig("max_depth")
     self.__crawlInterval = self.getSpiderConfig("crawl_interval")
     self.__crawlTimeout = self.getSpiderConfig("crawl_timeout")
     self.__targetUrl = self.getSpiderConfig("target_url")
     self.__threadCount = self.getSpiderConfig("thread_count")
Exemplo n.º 3
0
 def __init__(self, seedfile):
     """the invoker should make sure the seedfile is exist"""
     # the file which have the seed sites
     self.__seedfile = seedfile
     # the unvisite sites
     self.__unvistedUrls = queue.Queue()  # size is infinite
     # the visited sites
     self.__visitedUrls = []
     self.__logger = mslogger.MSLogger().getLogger()
     self.lock = threading.Lock()
     self._read_seed_file()
     self.config = msconfigparser.MSConfigParser(msconfigparser.default_cf)
Exemplo n.º 4
0
 def __init__(self, argv):
     parser = argparse.ArgumentParser()
     parser.add_argument("-c",
                         "--config",
                         default="./spider.conf",
                         dest="configfile",
                         help="indicate the config file")
     parser.add_argument("-v",
                         "--version",
                         action="version",
                         version="version 0.0.0",
                         help="show the version")
     self.__args = parser.parse_args(argv)
     self.__logger = mslogger.MSLogger().getLogger()
Exemplo n.º 5
0
 def __init__(self, urlstore):
     threading.Thread.__init__(self)
     self.__urlstore = urlstore
     self.__logger = mslogger.MSLogger().getLogger()
     self.config = msconfigparser.MSConfigParser(msconfigparser.default_cf)
Exemplo n.º 6
0
 def __init__(self, url, depth):
     self.__url = url
     self.__depth = depth
     self.__logger = mslogger.MSLogger().getLogger()
     self.__more_urls = []
     self.config = msconfigparser.MSConfigParser("")