def __init__(self): Spider.__init__(self, thread_number=1) self.setup_grab(timeout=120) self.repo = rdflib.Graph(store='default') self.repo.bind('foaf', FOAF) self.repo.bind('swc', SWC) self.repo.bind('skos', SKOS) self.repo.bind('swrc', SWRC) self.repo.bind('dbpedia-owl', DBPEDIAOWL) self.repo.bind('bibo', BIBO) self.repo.bind('dcterms', DCTERMS) self.repo.bind('dc', DC) self.repo.bind('timeline', TIMELINE)
def __init__(self): Spider.__init__(self, thread_number=1) self.setup_grab(timeout=120) self.repo = rdflib.Graph(store='default') self.repo.bind('foaf', FOAF) self.repo.bind('swc', SWC) self.repo.bind('skos', SKOS) self.repo.bind('swrc', SWRC) self.repo.bind('dbpedia-owl', DBPEDIAOWL) self.repo.bind('bibo', BIBO) self.repo.bind('dcterms', DCTERMS) self.repo.bind('dc', DC) self.repo.bind('timeline', TIMELINE)
def __init__(self, platform, policy, thread_number=10, proxy_support=False, proxy_servers=None, proxy_type='http'): """ :param thread_number: The number of threads :param proxy_support: Enable/disable proxy proxy_support :param proxy_servers: List of proxies ['host:port', 'host:port', etc.] :param proxy_type: The type of the proxy (http, socks4, socks5) """ self.platform = platform self.proxy_servers = proxy_servers self.proxy_type = proxy_type if proxy_support: self.get_proxy = lambda: choice(self.proxy_servers) else: self.get_proxy = lambda: None self.proxy_type = None self.posts = {} Fetcher.__init__(self, platform, policy) Spider.__init__(self, thread_number=thread_number, network_try_limit=5)
def __init__(self, id_list, thread_number): Spider.__init__(self, thread_number=thread_number) self.id_list = id_list