Пример #1
0
 def __init__(self, name, key_word, max_page, *args, **kwargs):
     self.base_url = 'http://s.wanfangdata.com.cn/Paper.aspx?'
     config = get_config(name)
     self.config = config
     self.key_word = key_word
     self.my_max_page = max_page
     self.allowed_domains = config.get('allowed_domains')
     super(WanfangSpider, self).__init__(*args, **kwargs)
Пример #2
0
 def __init__(self, name, key_word, min_page, max_page, *args, **kwargs):
     self.base_url = 'http://kns.cnki.net'
     self.home_url = 'http://kns.cnki.net/kns/request/SearchHandler.ashx?action=&NaviCode=*&'
     self.list_url = 'http://kns.cnki.net/kns/brief/brief.aspx'
     self.cur_referer = 'http://kns.cnki.net/kns/brief/default_result.aspx'
     config = get_config(name)
     self.config = config
     self.key_word = key_word
     self.my_max_page = max_page
     self.allowed_domains = config.get('allowed_domains')
     super(CNKISpider, self).__init__(*args, **kwargs)
Пример #3
0
 def __init__(self, name, key_word, max_page, *args, **kwargs):
     self.list_url = 'http://wap.cnki.net/touch/web/Article/Search'
     self.header = {'Referer': 'http://wap.cnki.net/touch/web'}
     config = get_config(name)
     self.config = config
     self.key_word = key_word
     self.my_max_page = max_page
     self.page_size = 10
     self.myFormData = {  #近十年的数据
         "searchtype": "0",
         "dbtype": "",
         "pageindex": "1",
         "pagesize": str(self.page_size),
         "theme_kw": "",
         "title_kw": "",
         "full_kw": "",
         "author_kw": "",
         "depart_kw": "",
         "key_kw": "",
         "abstract_kw": "",
         "source_kw": "",
         "teacher_md": "",
         "catalog_md": "",
         "depart_md": "",
         "refer_md": "",
         "name_meet": "",
         "collect_meet": "",
         "keyword": self.key_word,
         "remark": "",
         "fieldtype": "101",
         "sorttype": "0",
         "articletype": "11",
         "screentype": "0",
         "isscreen": "",
         "subject_sc": "",
         "research_sc": "",
         "depart_sc": "",
         "sponsor_sc": "",
         "author_sc": "",
         "teacher_sc": "",
         "subjectcode_sc": "",
         "researchcode_sc": "",
         "departcode_sc": "",
         "sponsorcode_sc": "",
         "authorcode_sc": "",
         "teachercode_sc": "",
         "starttime_sc": "2007",
         "endtime_sc": "2018",
         "timestate_sc": "1"
     }
     self.allowed_domains = config.get('allowed_domains')
     super(WAPCNKISpider, self).__init__(*args, **kwargs)