Exemplo n.º 1
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'https://www.androidperformance.com',
                     ],
                     index_xpath="//article/a/@href",
                     article_title_xpath="//*[@class='post-title']/a/text()",
                     article_content_xpath="//*[@class='article-entry']",
                     )
Exemplo n.º 2
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'http://www.wjdiankong.cn',
                     ],
                     index_xpath="//header/h2/a/@href",
                     article_title_xpath='//header/h1/a/text()',
                     article_content_xpath="//div/article",
                     article_trim_xpaths=['//div[@class="article-social"]'])
Exemplo n.º 3
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'http://fex.baidu.com/weekly/',
                     ],
                     index_xpath="//ul[@class='post-list']//a/@href",
                     article_title_xpath="//h1[@class='title']/text()",
                     article_content_xpath="//div[@class='content']",
                     )
Exemplo n.º 4
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'https://timyang.net',
                     ],
                     index_xpath="//h2/a/@href",
                     article_title_xpath="//*[@class='posttitle']/a/text()",
                     article_content_xpath="//*[@class='entry']",
                     )
Exemplo n.º 5
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'https://www.lfhacks.com/',
                     ],
                     index_xpath="//li[@class='article']/h4/a/@href",
                     article_title_xpath="//article[@class='article']/h1/text()",
                     article_content_xpath="//div[@itemprop='articleBody']",
                     )
Exemplo n.º 6
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'https://www.stackoverflow.wiki/blog',
                     ],
                     index_xpath="//h1/a/@href",
                     article_title_xpath="//h1[contains(@class, 'title')]/text()",
                     article_content_xpath="//div[@id='post-article']",
                     )
Exemplo n.º 7
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'https://xingzx.org/',
                     ],
                     index_xpath="//h2[@class='post-title']/a/@href",
                     article_title_xpath="//div[@class='markdown-body editormd-preview-container']/h1/text()",
                     article_content_xpath="//div[@class='blog-body']",
                     )
Exemplo n.º 8
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'https://www.jdon.com/approval',
                     ],
                     index_xpath="//h3[@class='vid-name']/a/@href",
                     article_title_xpath="//title/text()",
                     article_content_xpath="//div[@class='post_body_content']",
                     )
Exemplo n.º 9
0
 def __init__(self):
     Spider.__init__(
         self,
         start_urls=['https://github.com/polaris1119/golangweekly'],
         index_xpath="//article//a[contains(text(),'угг')]/@href",
         article_title_xpath='//article//h1/text()',
         article_content_xpath="//article",
         index_limit_count=3,
     )
Exemplo n.º 10
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'https://moelove.info/',
                     ],
                     index_xpath="//h1[@class='post-title']/a/@href",
                     article_title_xpath="//h1[@class='post-title']/text()",
                     article_content_xpath="//div[@class='post-content']",
                     )
Exemplo n.º 11
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'https://yafeilee.com/archives',
                     ],
                     index_xpath="//a[@class='blog-title']/@href",
                     article_title_xpath="//h2[@class='blog-title']/text()",
                     article_content_xpath="//div[@class='content markdown']",
                     index_limit_count=1,
                     )
Exemplo n.º 12
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'http://www.yinwang.org',
                     ],
                     index_xpath="//ul[@class='list-group']/li/a/@href",
                     article_title_xpath='//h2/text()',
                     article_content_xpath="//div[@class='inner']",
                     index_limit_count=3,
                     )
Exemplo n.º 13
0
 def __init__(self):
     Spider.__init__(
         self,
         start_urls=[
             'https://www.coder4.com/',
         ],
         index_xpath="//h1[@class='entry-title']/a/@href",
         article_title_xpath="//h1[@class='entry-title']/text()",
         article_content_xpath="//div[@class='entry-content']",
     )
Exemplo n.º 14
0
 def __init__(self):
     Spider.__init__(
         self,
         start_urls=[
             'http://ued.baidu.com/case',
         ],
         index_xpath="//ul[@class='case-article-list']/li/a/@href",
         article_title_xpath="//div[@class='article-title']/text()",
         article_content_xpath="//div[@class='article-content']",
     )
Exemplo n.º 15
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'https://github.com/aliyunfe/weekly',
                     ],
                     index_xpath="//a[contains(text(),'》第')]/@href",
                     article_title_xpath="//h2//*[@class='final-path']/text()",
                     article_content_xpath="//div[@id='readme']",
                     index_reverse=True
                     )
Exemplo n.º 16
0
 def __init__(self):
     Spider.__init__(
         self,
         start_urls=[
             'https://lucifer.ren/blog/',
         ],
         index_xpath="//h2[@class='title']/a/@href",
         article_title_xpath="//h1[@class='title']/a/text()",
         article_content_xpath="//div[@class='article-entry']",
     )
Exemplo n.º 17
0
 def __init__(self):
     Spider.__init__(
         self,
         start_urls=[
             'https://github.com/trending?since=monthly',
         ],
         index_xpath='//*[@class="Box-row"]/h1/a/@href',
         article_title_xpath='//meta[@property="og:title"]/@content',
         article_content_xpath='//div[@id="readme"]',
     )
Exemplo n.º 18
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'https://hiwannz.com/',
                     ],
                     index_xpath="//h2[@class='post-title']/a/@href",
                     article_title_xpath="//h1[@class='post-title']/text()",
                     article_content_xpath="//div[@class='post-content']",
                     index_limit_count=2,
                     )
Exemplo n.º 19
0
 def __init__(self):
     Spider.__init__(
         self,
         start_urls=[
             'http://www.alloyteam.com/page/0/',
         ],
         index_xpath="//ul[@class='articlemenu']/li/a[2]/@href",
         article_title_xpath="//div[@class='title1']/a[2]/text()",
         article_content_xpath="//div[@class='content_banner']",
     )
Exemplo n.º 20
0
 def __init__(self):
     Spider.__init__(
         self,
         start_urls=[
             'https://blog.miniasp.com/',
         ],
         index_xpath="//h2[@class='post-title']/a/@href",
         article_title_xpath="//h2[@class='post-title']/a/text()",
         article_content_xpath="//section[@class='post-body text']",
     )
Exemplo n.º 21
0
 def __init__(self):
     Spider.__init__(
         self,
         start_urls=[
             'https://kernel.taobao.org',
         ],
         index_xpath="//div[@class='article-title']/a/@href",
         article_title_xpath="//h1[@class='post-title']/text()",
         article_content_xpath="//div[@class='post-content']",
     )
Exemplo n.º 22
0
 def __init__(self):
     Spider.__init__(
         self,
         start_urls=[
             'https://teddysun.com',
         ],
         index_xpath="//header/h2/a/@href",
         article_title_xpath="//h1/a/text()",
         article_content_xpath="//*[@class='article-content']",
     )
Exemplo n.º 23
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'https://juejin.im/?sort=monthly_hottest',
                     ],
                     index_xpath="//div[@class='entry-box']//a[@class='entry-link']/@href",
                     article_title_xpath="//h1[@class='article-title']/text()",
                     article_content_xpath="//div[@class='article-content']",
                     browser=True
                     )
Exemplo n.º 24
0
 def __init__(self):
     Spider.__init__(
         self,
         start_urls=[
             'http://yalishizhude.com/',
         ],
         index_xpath="//h1[@class='post-title']/a/@href",
         article_title_xpath="//h1[@class='post-title']/text()",
         article_content_xpath="//div[@class='post-body']",
     )
Exemplo n.º 25
0
 def __init__(self):
     Spider.__init__(
         self,
         start_urls=[
             'https://blog.huoding.com/',
         ],
         index_xpath="//header/h1/a/@href",
         article_title_xpath="//article/header/h1/text()",
         article_content_xpath="//article//div[@class='entry-content']",
     )
Exemplo n.º 26
0
 def __init__(self):
     Spider.__init__(
         self,
         start_urls=[
             'https://sofi.sh/',
         ],
         index_xpath="//div[@class='block']/h2/a/@href",
         article_title_xpath="//a[@title='page.attributes.title']/h1/text()",
         article_content_xpath="//div[@itemprop='articleBody']",
     )
Exemplo n.º 27
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'https://www.cnblogs.com/pick/',
                     ],
                     index_xpath='//*[@id="post_list"]/div/div/h3/a/@href',
                     article_title_xpath='//*[@id="cb_post_title_url"]/text()',
                     article_content_xpath='//*[@id="cnblogs_post_body"]',
                     index_limit_count=4,
                     )
Exemplo n.º 28
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'https://github.com/dt-fe/weekly',
                     ],
                     index_xpath="//td[@class='content']//a[contains(text(),'.md') and not(contains(text(),'readme.md'))]/@href",
                     article_title_xpath="//h2//*[@class='final-path']/text()",
                     article_content_xpath='//article',
                     index_reverse=True,
                     )
Exemplo n.º 29
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'http://www.nginx.cn',
                     ],
                     index_xpath="//div[@class='post']/h2/a/@href",
                     article_title_xpath="//div[@class='post']/h1/text()",
                     article_content_xpath="//div[@class='post']/div[@class='content']",
                     index_limit_count=3,
                     )
Exemplo n.º 30
0
 def __init__(self):
     Spider.__init__(self,
                     start_urls=[
                         'http://gityuan.com',
                     ],
                     index_xpath="//div[@class='post-preview']/a/@href",
                     article_title_xpath='//h1/text()',
                     article_content_xpath="//div[contains(@class, 'post-container')]",
                     index_limit_count=6,
                     )