def __init__(self): # parent construct Crawler.__init__(self) self.crawl_cookie = {} self.status_code = '' self.history = ''
def __init__(self): crawler.__init__(self) self.title = '' self.press = '' self.date = '' self.time = '' self.contents = ''
def __init__(self, forced=False): Crawler.__init__(self) self.results = set() self.forced = forced self.success_count = None self.failure_count = None self.blacklist = [] self.name_exceptions = ["http://www.cplusplus.com/reference/string/swap/"]
def __init__(self, forced=False): Crawler.__init__(self) self.results = set() self.forced = forced self.success_count = None self.failure_count = None self.blacklist = [] self.name_exceptions = [ 'http://www.cplusplus.com/reference/string/swap/' ]
def __init__(self, filename): Crawler.__init__(self) self.viewed_url = set() # store the href with hash value self.candidate_url = list() self.database = self.connectDb()
def __init__(self): crawler.__init__(self) self.articleList = []
def __init__(self, filename): Crawler.__init__(self) self.viewed_url = set() # store the href with hash value self.candidate_url = list() self.database = CsvDatabase(filename) self.database.buildColumn(['url', 'content'])
def __init__(self, short_name, long_name, base_url, domain, nested_scrape=False): Crawler.__init__(self, base_url, domain, nested_scrape=nested_scrape) self.short_name = short_name self.long_name = long_name