Exemplo n.º 1
0
#

from controllers.util.database import db, getComics
from controllers.util.scraper import ComicScraper
from datetime import date

today = date.today().isoformat()
comics = getComics('working')
total = len(comics)
i = 0
#for comic in comics:
for comic in comics: 
  i += 1 
  print "Processing ", i, "/", total, "[" + str( (float(i)/float(total))*100.0 )+"%]" 
  # Test to see if the comic is already in the DB
  strips = db.query('select id from strips where comic_id=' + str(comic['id']) + ' and date = %s', today)
  if len(strips) > 0:
    print "Today's " + comic.name + " is already in the database, skipping."
    continue
    
  # Scrape the comic image from the comic website
  scraper = ComicScraper(comic)

  # Check for same image on page
  strips = db.query('select MAX(date), url from strips where comic_id=' + str(comic['id']) + ';')
  if len(strips) > 0:
    print "Checking for known image..."
    url = strips[0]['url']
    
    if scraper.contentHasImage(url):
      print "Same image as last scrape, skipping."
Exemplo n.º 2
0
 url = 'http://lfgcomic.com/page/latest'
 scraper = ComicScraper({'site_url':url, 'comic_url': u'0'})
 
 i += 1 
 print "Processing ", i, "/", top, "[" + str( (float(i)/float(top))*100.0 )+"%]" 
 image = scraper.findComicImage()
 print image
 
 
 # # # #
 if ('rack-header.jpg') in image['src']:
   print 'header: BUST!\n'
   continue
   
 # Check for same image on page
 strips = db.query('select url from strips where comic_id=' + comic_id + ';')
 if len(strips) > 0:
   found = False
   
   print "Checking for known image..."
   for urlIn in strips:
     print urlIn
     if not found and scraper.contentHasImage(urlIn['url']):
       print "Same image as last scrape, skipping.\n"
       found = True
       continue
   if found:
     continue
     
   print "Scraping " + comic_name
 # If the scrape was successful pop it into the strips DB