示例#1
0
 def parse(self, response):
     text = response.xpath('//div[@id = "content_box"]').extract_first()
     try:
         text = text.split('</span></div></div>')[1]
     except:
         print('error merkle')
     text = text.split('<script type="text/javascript">')[0]
     try:
         text = text.replace(
             'freestar.queue.push(function () { googletag.display(\'TheMerkle_728x90_320x50_BTF\'); });',
             '')
     except:
         print('error replacing the merkle')
     #text processing
     text = fun.textPreprocessing(text)
     try:
         text = fun.textPreprocessing(text)
     except:
         print('error processing')
     try:
         text = text.replace(
             'freestar queue push function googletag display TheMerkle_728x90_320x50_BTF',
             ' ')
     except:
         print('error 2 replacing')
     #only alphabetic
     try:
         News.update(
             body=text,
             bitcoinBoolean=fun.aboutBitcoin(text),
             ethereumBoolean=fun.aboutEthereum(text),
             finished=True).where(News.link == response.url).execute()
     except:
         print('error storing')
示例#2
0
 def parse(self, response):
     text = response.xpath(
         '//div[@class = "post-full-text contents"]').extract_first()
     #text processing
     text = fun.textPreprocessing(text)
     News.update(body=text,
                 bitcoinBoolean=fun.aboutBitcoin(text),
                 ethereumBoolean=fun.aboutEthereum(text),
                 finished=True).where(News.link == response.url).execute()
 def parse(self, response):
     text = response.xpath('//div[@class = "rich-text"]').extract_first()
     text = text.split('<p class="tagline">')[0]
     #text processing
     text = fun.textPreprocessing(text)
     #only alphabetic
     News.update(body=text,
                 bitcoinBoolean=fun.aboutBitcoin(text),
                 ethereumBoolean=fun.aboutEthereum(text),
                 finished=True).where(News.link == response.url).execute()
 def parse(self, response):
     textParts = response.xpath('//div[@class = "name"]').extract()
     textParts = textParts + response.xpath(
         '//div[@class = "clearfix content"]').extract()
     text = ''
     for part in textParts:
         text = text + unicode(part)
     text = fun.textPreprocessing(text)
     News.update(body=text,
                 bitcoinBoolean=fun.aboutBitcoin(text),
                 ethereumBoolean=fun.aboutEthereum(text),
                 finished=True).where(News.link == response.url).execute()
示例#5
0
    def parse(self, response):
        text = response.xpath(
            '//div[@class = "entry-content"]').extract_first()
        text = text.split('Disclaimer')[0]
        text = text.split('CDATA id15 Content Ad 2 OA_show 15 ')[0]
        #text processing
        text = fun.textPreprocessing(text)

        #only alphabetic
        News.update(body=text,
                    bitcoinBoolean=fun.aboutBitcoin(text),
                    ethereumBoolean=fun.aboutEthereum(text),
                    finished=True).where(News.link == response.url).execute()
示例#6
0
 def parse(self, response):
     text = response.xpath(
         '//div[@class = "article-content-container noskimwords"]'
     ).extract_first()
     #text processing
     text = fun.textPreprocessing(text)
     text = text.split('function e t r n c a l')[0]
     text = text.split('image via ')[0]
     text = text.split('Image via ')[0]
     text = text.split('via Shutter')[0]
     News.update(
         body=text,
         bitcoinBoolean=fun.aboutBitcoin(text),
         ethereumBoolean=fun.aboutEthereum(text),
         finished=True).where(News.link == str(response.url)).execute()
示例#7
0
 def parse(self, response):
     text = response.xpath('//div[@class = "post-info"]').extract_first()
     try:
         text = text.split('<!--Content Ad -->')[2]
     except:
         try:
             text = text.split('<li class="sm-share reddit">')[1]
         except:
             print('error livebitcoinnews')
     text = text.split('<footer class=')[0]
     text = text.split('Header image')[0]
     #text processing
     text = fun.textPreprocessing(text)
     #only alphabetic
     News.update(body=text,
                 bitcoinBoolean=fun.aboutBitcoin(text),
                 ethereumBoolean=fun.aboutEthereum(text),
                 finished=True).where(News.link == response.url).execute()