Пример #1
0
 def __init__(self):
     super().__init__()
     if not self.__net:
         self.__net = Net()
     if not self._users:
         self._users = SOUPUSERS
     LOGGER.info('new suppenkasper instance')
Пример #2
0
class Suppenkasper(object):

    __net = None
    _users = None
    __scrape = None

    # Vielen Dank an Frank für diese Awesome Regex!
    __img_rx = r'(url|src)="(http://asset-.\.soup\.io/asset/\d{4}/.{4}_.{4})(_.*)?\.(jpeg|jpg|gif|png)'
    __nxt_rx = r'SOUP.Endless.next_url.*/(since/\d*)'

    def __init__(self):
        super().__init__()
        if not self.__net:
            self.__net = Net()
        if not self._users:
            self._users = SOUPUSERS
        LOGGER.info('new suppenkasper instance')

    def __nextsince(self):
        for line in self.__scrape:
            if research(self.__nxt_rx, line):
                return research(self.__nxt_rx, line).group(1)
        return ''

    def __pageimages(self):
        for line in self.__scrape:
            imagesearch = research(self.__img_rx, line)
            if imagesearch and not research('square', imagesearch.group(0)):
                yield '%s.%s' % (imagesearch.group(2), imagesearch.group(4))
        return ''

    def _usercrawl(self, user, loops=SOUPPAGES):
        since = ''
        for loop in range(loops):
            url = 'http://{user}.soup.io/{since}'.format(user=user, since=since)
            self.__scrape = self.__net.url_scrape(url, split=True)
            since = self.__nextsince()
            LOGGER.info('finished page %d/%d for %s' %(loop+1, loops, user))
            for image in self.__pageimages():
                yield image

    def kasper(self):
        load = list()
        for user in self._users:
            LOGGER.info('kasper %s' %(user))
            for image in self._usercrawl(user):
                if image.split('/')[-1] not in RDB.get_all_images():
                    load.append(image)
                    yield user, image
        tpool(self.__net.download_image, load)
Пример #3
0
 def __init__(self, redis_opt):
     super().__init__()
     if not self.__rdb:
         self.__rdb = StrictRedis(
             host=redis_opt["host"],
             port=redis_opt["port"],
             db=redis_opt["db"],
             decode_responses=redis_opt["decode_responses"],
         )
         self.__redis_opt = redis_opt
         LOGGER.info("created new redis connection")
     if not self.__files:
         self.__files = Files()
     if not self.__net:
         self.__net = Net()
     if not self.__proc:
         self.__proc = Cycle(redis_opt["image_timeout"], self.next_image).start()
         pass
Пример #4
0
class Redabas(object):
    """docstring for redabas"""

    __rdb = None
    __redis_opt = None
    __files = None
    __net = None
    __proc = None

    def __init__(self, redis_opt):
        super().__init__()
        if not self.__rdb:
            self.__rdb = StrictRedis(
                host=redis_opt["host"],
                port=redis_opt["port"],
                db=redis_opt["db"],
                decode_responses=redis_opt["decode_responses"],
            )
            self.__redis_opt = redis_opt
            LOGGER.info("created new redis connection")
        if not self.__files:
            self.__files = Files()
        if not self.__net:
            self.__net = Net()
        if not self.__proc:
            self.__proc = Cycle(redis_opt["image_timeout"], self.next_image).start()
            pass

    def redis_ping(self):
        if self.__rdb:
            try:
                LOGGER.info("redis ping")
                return self.__rdb.ping()
            except RedisConnectionError as ex:
                LOGGER.error("could not ping redis: %s" % (ex))

        return False

    def get_ropt(self, field):
        if field in self.__redis_opt.keys():
            return self.__redis_opt[field]

    def flush_all(self):
        rdbfields = list()
        for folder in self.__files.get_contentsub(full=False):
            rdbfields.append("%s:%s" % (self.__redis_opt["image_prefix"], folder))
        rdbfields.append("%s:feed" % (self.__redis_opt["status_prefix"]))

        for entry in rdbfields:
            self.__rdb.delete(entry)
            LOGGER.info("flushed data for %s" % (entry))

    #

    def get_images(self, folder="public"):
        """gets images from redis"""
        result = list()
        rdbfield = "%s:%s" % (self.__redis_opt["image_prefix"], folder)

        def __readin():
            """reloads db"""
            self.__rdb.delete(rdbfield)
            for image in self.__files.find_images(folder=folder):
                self.__rdb.rpush(rdbfield, image)
                result.append(image)
            LOGGER.info("rebuilt redis image cache for %s" % (rdbfield))
            return result

        if folder in self.__files.get_contentsub():
            result = sorted(self.__rdb.lrange(rdbfield, 0, -1))
            return result if result else __readin()

    def __dblocate_image(self, name):
        for folder in self.__files.get_contentsub(full=False):
            if name in self.get_images(folder):
                return folder

    def locate_image(self, name):
        """locates images"""
        folder = self.__dblocate_image(name)
        if folder:
            LOGGER.info("found requested image %s in folder %s" % (name, folder))
            image = self.__files.jinja_static_file(name, folder=folder)
            if image:
                return image
        LOGGER.info("requested image %s not found" % (name))
        return self.__files.jinja_nullimg()

    def get_imagestats(self):
        """counts images"""
        result = dict()
        for folder in self.__files.get_contentsub(full=False):
            result[folder] = len(self.get_images(folder=folder))
        return result

    def get_all_images(self):
        """suppenkasper needs a list of all images"""
        result = list()
        for folder in self.__files.get_contentsub(full=False):
            result += self.get_images(folder=folder)
        return result

    def get_dict_images(self, folder):
        result = dict()
        if folder in self.__files.get_contentsub(full=False):
            for image in sorted(self.get_images(folder=folder)):
                result[image] = self.locate_image(image)
            return result

    def get_sort_images(self, folder="unsorted", page=0):
        """batch of images to sort"""
        result = dict()
        if folder in self.__files.get_contentsub(full=False):
            for image in sorted(self.get_images(folder=folder))[
                page * self.__redis_opt["sort_slices"] : page * self.__redis_opt["sort_slices"]
                + self.__redis_opt["sort_slices"]
            ]:
                result[image] = self.locate_image(image)
            return result

    def random_image(self, folder="public"):
        """just one of those images"""
        if folder in self.__files.get_contentsub(full=False):
            images = self.get_images(folder)
            if images:
                result = self.locate_image(choice(images))
                return result if result else self.__files.jinja_nullimg()

    def move_image(self, name, target):
        """moves images"""
        folder = self.__dblocate_image(name)
        if folder and target in self.__files.get_contentsub(full=False):
            sourcefile = self.__files.jinja_static_file(name, folder=folder, full=True)
            targetfile = self.__files.path_join(target, name)
            if sourcefile and targetfile:
                rdbsourcefield = "%s:%s" % (self.__redis_opt["image_prefix"], folder)
                rdbtargetfield = "%s:%s" % (self.__redis_opt["image_prefix"], target)
                self.__rdb.rpush(rdbtargetfield, name)
                self.__rdb.lrem(rdbsourcefield, 0, name)
                self.__files.file_rename(sourcefile, targetfile)
                print("<- %s\n-> %s\n[ %s %s ]\n\n" % (sourcefile, targetfile, rdbsourcefield, rdbtargetfield))

    #

    def get_status(self):
        """gets status from redis"""
        result = dict()
        rdbfield = "%s:feed" % (self.__redis_opt["status_prefix"])

        def __readin():
            """reloads db"""
            self.__rdb.delete(rdbfield)
            result = self.__net.url_scrape(self.__redis_opt["status_url"])
            if result:
                self.__rdb.set(rdbfield, result)
                self.__rdb.expire(rdbfield, self.__redis_opt["status_expire"])
                LOGGER.info(
                    "rebuilt redis status cache for %s, again after %i seconds"
                    % (rdbfield, self.__redis_opt["status_expire"])
                )
                return loads(result)

        result = self.__rdb.get(rdbfield)
        return loads(result) if result else __readin()

    #

    def browser_shout(self, channel):
        pubsub = self.__rdb.pubsub()
        pubsub.subscribe(channel)
        for event in pubsub.listen():
            LOGGER.info("shouting to browser channel:%s message:%s" % (channel, event["data"]))
            if event["type"] == "message":
                strdata = "data: %s\r\n\r\n" % (event["data"].replace("\n", "<br />"))
                yield strdata.encode("UTF-8")

    def redis_shout(self, channel, message):
        LOGGER.info("shouting to redis channel:%s message:%s" % (channel, message))
        self.__rdb.publish(channel, message)
        return message

    #

    def next_image(self):
        """shout next random image from public to redis"""
        image = self.random_image(folder="public")
        if image:
            LOGGER.info("shouting next image: %s" % (image))
            self.redis_shout(self.__redis_opt["image_pubsub"], image)
            print("shouting next image: %s" % (image))