Пример #1
0
 def __init__(self, starting_url=False, save=False, initial_seed=False):
     Crawler.__init__(self)
     #No database values, just these attributes
     self.url = starting_url if starting_url else self.base_url + "/w/Category:Artist"
     self.save = save
     self.initial_seed = initial_seed
     #self.url = self.tree.xpath("//div[@class='listPagination'][1]/a[contains(text(), 'next')]/@href")[0]
     return
Пример #2
0
    def __init__(self, url, save=False, initial_seed=False):
        Crawler.__init__(self)
        #Database values
        self.row_id = False
        self.name = ''
        self.url = url

        #Other variables
        self.save = save #If not saving, print for debugging and testing purposes
        self.initial_seed = initial_seed
        return
Пример #3
0
    def __init__(self, dj_id, dj_name, url, save=False, initial_seed=False):
        Crawler.__init__(self)
        #Database values
        self.row_id = False
        self.dj_id = dj_id
        self.url = url
        self.track_ids = list()
        self.multi_dj = False
        self.multi_version = True
        self.page_mod_time = False

        #Other attributes, including xpath components
        self.dj_name = dj_name
        self.searchable_dj_name = self.dj_name.split("(")[0].strip()
        self.no_comments_selector = "not(contains(@class,'commenttextfield'))"
        self.tree = self.get_tree(url)
        self.track_texts = list()
        self.save = save #If not saving, print for debugging and testing purposes
        self.initial_seed = initial_seed
        return