Пример #1
0
def main():
    host = 'localhost'
    database = 'resys'
    user = '******'
    password = ''
    db = Connection(host,database,user,password)
    for row in db.query('select * from fatwallet'):
        if row.url.find('?start=0')!=-1 or re.match('^http://www.fatwallet.com/forums/hot-deals/[0-9]+/$',row.url)!=None:
            output(row)
            save(row,db)
Пример #2
0
class FatwalletSpider(Spider):
    def __init__(self, beginurl, reg, priorcontent, pages, downloadFolder, host, database, user=None, password=None):
        super(FatwalletSpider, self).__init__(beginurl, reg, priorcontent, pages, downloadFolder)
        self.db=Connection(host,database,user,password)
        
    def saveHTML(self, url, html, path):
        md5=self.md5(html)
        raw_html=html
        text_html=''.join([c for c in html if c in string.printable])
        self.insertHTML2DB('fatwallet', url, raw_html, text_html, md5)
        super(FatwalletSpider, self).saveHTML(url,html)
    
    def insertHTML2DB(self, TABLE, URL, RAW, TEXT, MD5):
        self.db.insert(TABLE, url=URL,raw_html=RAW,text_html=TEXT, md5=MD5)
Пример #3
0
 def __init__(self, beginurl, reg, priorcontent, pages, downloadFolder, host, database, user=None, password=None):
     super(FatwalletSpider, self).__init__(beginurl, reg, priorcontent, pages, downloadFolder)
     self.db=Connection(host,database,user,password)