Exemplo n.º 1
0
	def __init__(self, threads, smtpSession, smtpLock, feedItem):
		'''Init ifanr info.'''
		print '''Init ifanr info.'''
		BaseSpider.__init__(self, threads, smtpSession, smtpLock, feedItem)
                self.pattern1 = r'<a rel="external" href="http://www.ifanr.com/(\d+?)" title=".+?">.*?<span itemprop="headline">(.+?)</span>.*?</a>'
                self.pattern2 = r'<div itemprop="articleBody">()(.+?)</div>'
                self.urlPrefix = '/'
Exemplo n.º 2
0
	def __init__(self, threads, smtpSession, smtpLock, feedItem):
		'''Init cnbeta info.'''
		print '''Init cnbeta info.'''
		BaseSpider.__init__(self, threads, smtpSession, smtpLock, feedItem)
                self.pattern1 = r'<div class="title">.+?<a target="_blank" href="/articles/(\d+?).htm">(.+?)</a>'
                self.pattern2 = r'<div class="introduction">.+?<p>(.+?)</p>.+?<div class="content">(.+?)(<iframe class="tigerstock"|<div class="clear")'
                self.urlPrefix = '/articles/'
                self.urlSuffix = '.htm'
Exemplo n.º 3
0
	def __init__(self, threads, smtpSession, smtpLock, feedItem):
		'''Init 36kr info.'''
		print '''Init 36kr info.'''
		BaseSpider.__init__(self, threads, smtpSession, smtpLock, feedItem)
                self.pattern1 = r'<a class="title info_flow_news_title".+?href="/p/(\d+?).html" target="_blank">(.+?)</a>'
                self.pattern2 = r'<h1 class="single-post__title">(.+?)</h1>.*?<section class="article".+?>(.+?)</section>'
                self.urlPrefix = '/p/'
                self.urlSuffix = '.html'
Exemplo n.º 4
0
 def __init__(self):
     BaseSpider.__init__(self)
     self.login_status = True
Exemplo n.º 5
0
 def __init__(self):
     BaseSpider.__init__(self)
     self.login_status = True
Exemplo n.º 6
0
 def __init__(self):
     BaseSpider.__init__(self)
     self.login_url = 'http://acm.hust.edu.cn/vjudge/user/login.action'
Exemplo n.º 7
0
 def __init__(self):
     BaseSpider.__init__(self)
     self.login_url = 'http://poj.org/login'
Exemplo n.º 8
0
 def __init__(self):
     BaseSpider.__init__(self)
     self.login_url = 'http://acm.bnu.edu.cn/v3/ajax/login.php'
Exemplo n.º 9
0
 def __init__(self):
     BaseSpider.__init__(self)
     self.login_url = 'http://acm.zju.edu.cn/onlinejudge/login.do'
     self.status_url = ''
Exemplo n.º 10
0
 def __init__(self):
     BaseSpider.__init__(self)
     self.login_url = 'http://poj.org/login'
Exemplo n.º 11
0
 def __init__(self, idList=[], threadId=0):
     BaseSpider.__init__(self, idList, threadId)
Exemplo n.º 12
0
 def __init__(self):
     BaseSpider.__init__(self)
Exemplo n.º 13
0
 def __init__(self):
     BaseSpider.__init__(self)
     self.login_url = 'http://acm.bnu.edu.cn/v3/ajax/login.php'
Exemplo n.º 14
0
 def __init__(self):
     BaseSpider.__init__(self)
     self.login_url = 'http://acm.hdu.edu.cn/userloginex.php?action=login'
Exemplo n.º 15
0
 def __init__(self):
     BaseSpider.__init__(self)
     self.login_url = 'http://acm.hdu.edu.cn/userloginex.php?action=login'
Exemplo n.º 16
0
        BaseSpider.__init__(self, idList, threadId)

    def genUrl(self, id):
        endTime = str(int(round(time.time() * 1000)))
        url = 'http://pdfm2.eastmoney.com/EM_UBG_PDTI_Fast/api/js?id=$ID$1&TYPE='
        if ('3' == str(id)[0]) or ('0' == str(id)[0]):
            url = 'http://pdfm2.eastmoney.com/EM_UBG_PDTI_Fast/api/js?id=$ID$2&TYPE='
        url += 'wk&js=$FUN$((x))&rtntype=4&extend=kdj&check=kte&authorityType=fa&$FUN$=$FUN$'
        url = url.replace('$ID$', id).replace('$FUN$',
                                              'fsDataTeacma' + endTime)
        print url
        return url
        '''
    http://pdfm.eastmoney.com/EM_UBG_PDTI_Fast/api/js?token=4f1862fc3b5e77c150a2b985b12db0fd&rtntype=6&id=0000022&type=wk&authorityType=fa&cb=jsonp1539234497088
    http://pdfm2.eastmoney.com/EM_UBG_PDTI_Fast/api/js?id=0000022&TYPE=k&js=fsDataTeacma((x))&rtntype=4&extend=kdj&check=kte&authorityType=fa&fsDataTeacma=fsDataTeacma
    '''


if __name__ == '__main__':

    threads = 50  # 线程数(不能少于任务数)
    idList = BaseSpider.getIdList()

    KdjWkSpider().initDir()

    step = len(idList) / threads  # total > threads
    for threadId in xrange(1, threads + 1):
        subIdList = idList[((threadId - 1) * step):(threadId * step)]
        spider = KdjWkSpider(subIdList, threadId)
        spider.start()
Exemplo n.º 17
0
 def __init__(self):
     BaseSpider.__init__(self)
     self.login_url = 'http://uva.onlinejudge.org/index.php?option=com_comprofiler&task=login'
Exemplo n.º 18
0
 def __init__(self):
     BaseSpider.__init__(self)
Exemplo n.º 19
0
 def __init__(self):
     BaseSpider.__init__(self)
     self.login_url = 'http://acm.zju.edu.cn/onlinejudge/login.do'
     self.status_url = ''
Exemplo n.º 20
0
 def __init__(self):
     BaseSpider.__init__(self)
     self.login_url = 'http://uva.onlinejudge.org/index.php?option=com_comprofiler&task=login'
Exemplo n.º 21
0
 def __init__(self):
     BaseSpider.__init__(self)
     self.login_url = 'http://acm.hust.edu.cn/vjudge/user/login.action'