示例#1
0
 def __init__(self):
     Spider.__init__(self, thread_number=1)
     self.setup_grab(timeout=120)
     self.repo = rdflib.Graph(store='default')
     self.repo.bind('foaf', FOAF)
     self.repo.bind('swc', SWC)
     self.repo.bind('skos', SKOS)
     self.repo.bind('swrc', SWRC)
     self.repo.bind('dbpedia-owl', DBPEDIAOWL)
     self.repo.bind('bibo', BIBO)
     self.repo.bind('dcterms', DCTERMS)
     self.repo.bind('dc', DC)
     self.repo.bind('timeline', TIMELINE)
示例#2
0
 def __init__(self):
     Spider.__init__(self, thread_number=1)
     self.setup_grab(timeout=120)
     self.repo = rdflib.Graph(store='default')
     self.repo.bind('foaf', FOAF)
     self.repo.bind('swc', SWC)
     self.repo.bind('skos', SKOS)
     self.repo.bind('swrc', SWRC)
     self.repo.bind('dbpedia-owl', DBPEDIAOWL)
     self.repo.bind('bibo', BIBO)
     self.repo.bind('dcterms', DCTERMS)
     self.repo.bind('dc', DC)
     self.repo.bind('timeline', TIMELINE)
示例#3
0
 def __init__(self,
              platform,
              policy,
              thread_number=10,
              proxy_support=False,
              proxy_servers=None,
              proxy_type='http'):
     """
     :param thread_number: The number of threads
     :param proxy_support: Enable/disable proxy proxy_support
     :param proxy_servers: List of proxies ['host:port', 'host:port', etc.]
     :param proxy_type: The type of the proxy (http, socks4, socks5)
     """
     self.platform = platform
     self.proxy_servers = proxy_servers
     self.proxy_type = proxy_type
     if proxy_support:
         self.get_proxy = lambda: choice(self.proxy_servers)
     else:
         self.get_proxy = lambda: None
         self.proxy_type = None
     self.posts = {}
     Fetcher.__init__(self, platform, policy)
     Spider.__init__(self, thread_number=thread_number, network_try_limit=5)
示例#4
0
 def __init__(self, id_list, thread_number):
     Spider.__init__(self, thread_number=thread_number)
     self.id_list = id_list