Пример #1
0
    def __init__(self):
        # parent construct
        Crawler.__init__(self)

        self.crawl_cookie = {}
        self.status_code = ''
        self.history = ''
Пример #2
0
    def __init__(self):
        # parent construct
        Crawler.__init__(self)

        self.crawl_cookie = {}
        self.status_code = ''
        self.history = ''
Пример #3
0
 def __init__(self):
     crawler.__init__(self)
     self.title = ''
     self.press = ''
     self.date = ''
     self.time = ''
     self.contents = ''
Пример #4
0
    def __init__(self, forced=False):
        Crawler.__init__(self)
        self.results = set()
        self.forced = forced
        self.success_count = None
        self.failure_count = None

        self.blacklist = []
        self.name_exceptions = ["http://www.cplusplus.com/reference/string/swap/"]
Пример #5
0
    def __init__(self, forced=False):
        Crawler.__init__(self)
        self.results = set()
        self.forced = forced
        self.success_count = None
        self.failure_count = None

        self.blacklist = []
        self.name_exceptions = [
            'http://www.cplusplus.com/reference/string/swap/'
        ]
Пример #6
0
 def __init__(self, filename):
     Crawler.__init__(self)
     self.viewed_url = set()   # store the href with hash value
     self.candidate_url = list()
     self.database = self.connectDb()
 def __init__(self):
     crawler.__init__(self)
     self.articleList = []
Пример #8
0
 def __init__(self, filename):
     Crawler.__init__(self)
     self.viewed_url = set()   # store the href with hash value
     self.candidate_url = list()
     self.database = CsvDatabase(filename)
     self.database.buildColumn(['url', 'content'])
Пример #9
0
 def __init__(self, short_name, long_name, base_url, domain, nested_scrape=False):
     Crawler.__init__(self, base_url, domain, nested_scrape=nested_scrape)
     self.short_name = short_name
     self.long_name = long_name