Пример #1
0
def initialize():
    ''' Check what we have to check and set paramaters '''
    global __INITIALIZED__

    if __INITIALIZED__:
        return False

    with INIT_LOCK:
        db_check()

        if load_config():
            # Lets create some directories
            dirs = [COMIC_DIR, LOG_DIR, COMIC_INT, COMIC_STATIC]

            OPS_LIST = [
                WEB_PORT, LOG_DIR, COMIC_DIR, COMIC_DB, COMIC_INT, COMIC_STATIC
            ]

            for opts in OPS_LIST:
                logger.log(u'Loaded: ' + opts)

            for folder in dirs:
                dir_check(os.path.join(DATA_DIR, folder))

    __INITIALIZED__ = True
Пример #2
0
    def action(self, query, args=None):

        with db_lock:

            if query is None:
                return

            sqlResult = None
            attempt = 0

            while attempt < 5:
                try:
                    if args is None:
                        logger.log(self.filename+": "+query)
                        sqlResult = self.connection.execute(query)
                    else:
                        logger.log(self.filename+": "+query+" with args "+str(args))
                        sqlResult = self.connection.execute(query, args)
                    self.connection.commit()
                    # get out of the connection attempt loop since we were successful
                    break
                except sqlite3.OperationalError, e:
                    if "unable to open database file" in e.message or "database is locked" in e.message:
                        #logger.log(u"DB error: "+ex(e), logger.WARNING)
                        attempt += 1
                        time.sleep(1)
                    else:
                        #logger.log(u"DB error: "+ex(e), logger.ERROR)
                        raise
                except sqlite3.DatabaseError, e:
                    #logger.log(u"Fatal error executing query: " + ex(e), logger.ERROR)
                    raise
Пример #3
0
 def StopAllTasks(self):
     #for task in self.tasks:
     for task in comicstrip.TASK_LIST:
         logger.log(u'Stopping task ' + str(task))
         task.stop()
         task.join()
         logger.log(u'Stopped')
Пример #4
0
    def action(self, query, args=None):

        with db_lock:

            if query is None:
                return

            sqlResult = None
            attempt = 0

            while attempt < 5:
                try:
                    if args is None:
                        logger.log(self.filename+": "+query)
                        sqlResult = self.connection.execute(query)
                    else:
                        logger.log(self.filename+": "+query+" with args "+str(args))
                        sqlResult = self.connection.execute(query, args)
                    self.connection.commit()
                    # get out of the connection attempt loop since we were successful
                    break
                except sqlite3.OperationalError, e:
                    if "unable to open database file" in e.message or "database is locked" in e.message:
                        #logger.log(u"DB error: "+ex(e), logger.WARNING)
                        attempt += 1
                        time.sleep(1)
                    else:
                        #logger.log(u"DB error: "+ex(e), logger.ERROR)
                        raise
                except sqlite3.DatabaseError, e:
                    #logger.log(u"Fatal error executing query: " + ex(e), logger.ERROR)
                    raise
Пример #5
0
    def upsert(self, tableName, valueDict, keyDict):
        logger.log(u'TB: ' + str(tableName) + 'val: ' + str(valueDict) + 'key: ' + str(keyDict))
        changesBefore = self.connection.total_changes

        genParams = lambda myDict : [x + " = ?" for x in myDict.keys()]

        query = "UPDATE "+tableName+" SET " + ", ".join(genParams(valueDict)) + " WHERE " + " AND ".join(genParams(keyDict))

        self.action(query, valueDict.values() + keyDict.values())

        if self.connection.total_changes == changesBefore:
            query = "INSERT INTO "+tableName+" (" + ", ".join(valueDict.keys() + keyDict.keys()) + ")" + \
                     " VALUES (" + ", ".join(["?"] * len(valueDict.keys() + keyDict.keys())) + ")"
            self.action(query, valueDict.values() + keyDict.values())
Пример #6
0
    def upsert(self, tableName, valueDict, keyDict):
        logger.log(u'TB: ' + str(tableName) + 'val: ' + str(valueDict) + 'key: ' + str(keyDict))
        changesBefore = self.connection.total_changes

        genParams = lambda myDict : [x + " = ?" for x in myDict.keys()]

        query = "UPDATE "+tableName+" SET " + ", ".join(genParams(valueDict)) + " WHERE " + " AND ".join(genParams(keyDict))

        self.action(query, valueDict.values() + keyDict.values())

        if self.connection.total_changes == changesBefore:
            query = "INSERT INTO "+tableName+" (" + ", ".join(valueDict.keys() + keyDict.keys()) + ")" + \
                     " VALUES (" + ", ".join(["?"] * len(valueDict.keys() + keyDict.keys())) + ")"
            self.action(query, valueDict.values() + keyDict.values())
Пример #7
0
    def __init__(self, **kwargs):
        ''' Initiate thread startup '''
        self.action = kwargs.get('action')
        self.cycleTime = kwargs.get('cycleTime')
        self.args = kwargs.get('args')
        self.runImmediatly = kwargs.get('runImmediatly')
        self.running = 1

        if self.runImmediatly:
            self.lastRun = datetime.datetime.fromordinal(1)
        else:
            self.lastRun = datetime.datetime.now()

        threading.Thread.__init__(self)
        logger.log(u'Thread Init: ' + str(threading.current_thread().name))
Пример #8
0
def page_find(comic_url):
    # This will run through all the next pages
    try:
        parse_page = bs(requests.get(comic_url).text)
    except:
        next_page = None
        return None

    parsed_url = urlparse.urlparse(comic_url)

    # First find all the links on the page
    for links in parse_page.findAll(['a']):
        if re.compile(".*next.*", re.IGNORECASE).match(str(links)):
           if links['href'].lower().startswith('http'):
              next_page = links['href']
              logger.log(u'CUR: ' + comic_url + ' NEXT: ' + next_page)
              return next_page
           else:
              if parsed_url.path and not parsed_url.query:
                 parsed_url = list(parsed_url)
                 # Catch those stupid # refrences to the same page
                 if not re.compile('.*#$.*').match(links['href']):
                    parsed_url[2] = links['href']
                    next_page = urlparse.urlunparse(parsed_url)
                    logger.log(u'CUR: ' + comic_url + ' NEXT: ' + next_page)
                 else:
                    next_page = None
              else:
                 logger.log(u'DEBUG: ' + str(parsed_url) + ' ' + str(links['href']))
                 next_page = list(parsed_url)
                 # Catch those stupid # refrences to the same page
                 if not re.compile('.*#$.*').match(links['href']):
                    if parsed_url.path.strip('/') not in links['href']:
                       next_page[4] = links['href'].strip('?')
                    else:
                       next_page[2] = links['href']
                       next_page[4] = None
                    next_page = urlparse.urlunparse(next_page)
                    logger.log(u'CUR: ' + comic_url + ' NEXT: ' + next_page)
                 else:
                    next_page = None
              return next_page
    else:
        logger.log(u'FAILED: ' + comic_url)
        return None
Пример #9
0
def load_config():
    global WEB_PORT, WEB_HOST, LOG_DIR, COMIC_DIR, COMIC_DB, COMIC_INT, COMIC_STATIC

    conf = ConfigParser()
    conf.read(CFG_FILE)
    try:
        WEB_PORT = conf.get('General', 'web_port')
        WEB_HOST = conf.get('General', 'web_host')
        LOG_DIR = conf.get('General', 'log_dir')
        COMIC_DIR = conf.get('General', 'comic_dir')
        COMIC_INT = conf.get('General', 'comic_int')
        COMIC_STATIC = conf.get('General', 'comic_static')
        COMIC_DB = conf.get('General', 'comic_db')

        return True
    except:
        logger.log(u'Could not load config file creating default file')
        save_config()
Пример #10
0
def load_config():
    global WEB_PORT, WEB_HOST, LOG_DIR, COMIC_DIR, COMIC_DB, COMIC_INT, COMIC_STATIC

    conf = ConfigParser()
    conf.read(CFG_FILE)
    try:
        WEB_PORT = conf.get('General', 'web_port')
        WEB_HOST = conf.get('General', 'web_host')
        LOG_DIR = conf.get('General', 'log_dir')
        COMIC_DIR = conf.get('General', 'comic_dir')
        COMIC_INT = conf.get('General', 'comic_int')
        COMIC_STATIC = conf.get('General', 'comic_static')
        COMIC_DB = conf.get('General', 'comic_db')

        return True
    except:
        logger.log(u'Could not load config file creating default file')
        save_config()
Пример #11
0
def grab_strip(comic_id, outpath, strip_no, current_url, replace=False):

    parsed = list(urlparse.urlparse(current_url))
    soup = bs(requests.get(current_url).text)

    # Loop through all the images soup finds
    for image in soup.findAll('img'):
        filename = image['src'].split('/')[-1]
        if filename.endswith(('.jpg','.png','.gif')):
           if image['src'].lower().startswith('http'):
              strip_img = requests.get(image["src"])
           else:
              parsed[2] = image['src']
              parsed[4] = None
              strip_img = requests.get(urlparse.urlunparse(parsed))


           if strip_img.status_code == requests.codes.ok:
              s = StringIO(strip_img.content)
              strip = Image.open(s)
              w,h = strip.size

              if w > 249 and h > 320 or w > 320 and h > 249:
                 filename = "%s%s" % (strip_no,os.path.splitext(filename)[-1])
                 save_path = os.path.join(comicstrip.COMIC_DIR, outpath)
                 path_exists(save_path)
                 save_path = os.path.join(save_path, filename)
                 db_path = os.path.join(outpath, filename)
                 logger.log(u'PAGE: ' + current_url + 'IMAGE: ' + filename)

                 # Check if image exists already and skip saving
                 if not os.path.exists(save_path):
                    strip.save(save_path)
                 else:
                    logger.log(u'FOUND IMAGE: ' + save_path)

                 return { 'strip_no': strip_no, 'page_url': current_url, 'location': db_path }
    else:
       return { 'strip_no': strip_no, 'page_url': current_url, 'location': 'SKIPPED' }
Пример #12
0
def initialize():
    ''' Check what we have to check and set paramaters '''
    global __INITIALIZED__

    if __INITIALIZED__:
        return False

    with INIT_LOCK:
        db_check()

        if load_config():
            # Lets create some directories
            dirs = [COMIC_DIR, LOG_DIR, COMIC_INT, COMIC_STATIC]

            OPS_LIST = [WEB_PORT, LOG_DIR, COMIC_DIR, COMIC_DB, COMIC_INT, COMIC_STATIC]

            for opts in OPS_LIST:
                logger.log(u'Loaded: ' + opts)

            for folder in dirs:
                dir_check(os.path.join(DATA_DIR, folder))

    __INITIALIZED__ = True
Пример #13
0
def update_engine(comic_id=None,que=None):
    # Connect to the db
    myDB = db.DBConnection(row_type="dict")

    if comic_id is not None:
       sqlQuery = 'SELECT id,path,first_page,end_page FROM comic_list WHERE id = (%s)' % (comic_id,)
    else:
       sqlQuery = 'SELECT id,path,first_page,end_page FROM comic_list'

    for info in myDB.select(sqlQuery):
        # Populate some empty data stores
        db_upd_list = list()
        url_list = dict()
        # Grab the ending page if there is one. (Page where we cut off looking for next pages)
        end_page = info['end_page']

        last_url = myDB.select('SELECT strip_no,page_url FROM comic_strips WHERE comic_id = (?) ORDER BY strip_no DESC LIMIT 1',(info['id'],))
        logger.log(u'LAST URL: ' + str(last_url))
        if last_url:
           page_url = page_find(last_url[0]['page_url'])
           strip_no = last_url[0]['strip_no']
        else:
           page_url = info['first_page']
           strip_no = 0

        while page_url is not None and page_url not in url_list.values() and page_url != end_page:
              strip_no += 1
              url_list[strip_no] = page_url
              page_url = page_find(page_url)
              logger.log(u'URL LIST: ' + str(len(url_list)))

              #if len(url_list) > 5:
              #    break

        for strip_no in url_list.keys():
            myDB.upsert('comic_strips', grab_strip(info['id'], info['path'], strip_no, url_list[strip_no]), { 'comic_id': info['id'], 'strip_no': strip_no })
    myDB.connection.close()
Пример #14
0
def update_engine(comic_id=None,que=None):
    # Connect to the db
    myDB = db.DBConnection(row_type="dict")

    if comic_id is not None:
       sqlQuery = 'SELECT id,path,first_page,end_page FROM comic_list WHERE id = (%s)' % (comic_id,)
    else:
       sqlQuery = 'SELECT id,path,first_page,end_page FROM comic_list'

    for info in myDB.select(sqlQuery):
        # Populate some empty data stores
        db_upd_list = list()
        url_list = dict()
        # Grab the ending page if there is one. (Page where we cut off looking for next pages)
        end_page = info['end_page']

        last_url = myDB.select('SELECT strip_no,page_url FROM comic_strips WHERE comic_id = (?) ORDER BY strip_no DESC LIMIT 1',(info['id'],))
        logger.log(u'LAST URL: ' + str(last_url))
        if last_url:
           page_url = page_find(last_url[0]['page_url'])
           strip_no = last_url[0]['strip_no']
        else:
           page_url = info['first_page']
           strip_no = 0

        while page_url is not None and page_url not in url_list.values() and page_url != end_page:
              strip_no += 1
              url_list[strip_no] = page_url
              page_url = page_find(page_url)
              logger.log(u'URL LIST: ' + str(len(url_list)))

              #if len(url_list) > 5:
              #    break

        for strip_no in url_list.keys():
            myDB.upsert('comic_strips', grab_strip(info['id'], info['path'], strip_no, url_list[strip_no]), { 'comic_id': info['id'], 'strip_no': strip_no })
Пример #15
0
    def run(self):
        ''' Run the function we want '''
        logger.log(u'Thread Started: ' + str(threading.current_thread().name))

        while True:
            currentTime = datetime.datetime.now()

            if currentTime - self.lastRun > self.cycleTime:
                logger.log(u'Running task: '  + str(threading.current_thread().name))
                self.lastRun = currentTime
                try:
                    if self.args is not None:
                        self.action(self.args)
                    else:
                        self.action()

                except Exception, e:
                    raise
                    logger.log(u'Exception generated in thread ' + e)

            if not self.running:
                return

            time.sleep(1)
Пример #16
0
 def stop(self):
     logger.log(u'Sending stop signal')
     self.running = 0