Exemplo n.º 1
0
 def __init__(self, bot):
     self.bot = bot
     self.next_index = 1
     self.reverse_image_index = {}
     self.reverse_hash_index = {}
     self.index_json = {}
     self.myopener = MyOpener()
     self.lock = threading.Lock()
Exemplo n.º 2
0
def get_image_hash(url):
    file_name = '/tmp/' + get_random_file_name()
    imgur = ImgurFetcher()
    myopener = MyOpener()

    if not url.startswith('http'):
        url = '//' + url
    parsed = urlparse(url)
    imgur_url = imgur.get_image_url(parsed.path[1:])

    try:
        myopener.retrieve(imgur_url, file_name)
        return str(imagehash.dhash(Image.open(file_name)))
    except:
        return None
    finally:
        os.remove(file_name)
Exemplo n.º 3
0
class XkcdFetcher(object):
    def __init__(self, bot):
        self.bot = bot
        self.next_index = 1
        self.reverse_image_index = {}
        self.reverse_hash_index = {}
        self.index_json = {}
        self.myopener = MyOpener()
        self.lock = threading.Lock()

    def get_json(self, url):
        if not url.startswith('http'):
            url = '//' + url
        parsed = urlparse(url)
        if not parsed:
            return None

        if re.match('^(www\.)?imgs.xkcd.com$', parsed.netloc):
            if parsed.path not in self.reverse_image_index:
                self._load_reverse()
            comic_id = self.reverse_image_index.get(parsed.path)
            return self.index_json.get(comic_id) if comic_id else None

        if re.match('^(www\.)?xkcd.com$', parsed.netloc) and re.match('^/\d+/?$', parsed.path):
            m = re.search('^/(\d+)/?$', parsed.path)
            comic_id = int(m.group(1))
            if comic_id not in self.index_json:
                self._load_reverse()
            return self.index_json.get(comic_id) if comic_id else None

        if re.match('^imgur\.com$', parsed.netloc):
            hash = self.bot.imgur_lookup.get(url)
            if hash:
                if hash not in self.reverse_hash_index:
                    self._load_reverse()
                comic_id = self.reverse_hash_index.get(hash)
                j = self.index_json.get(comic_id) if comic_id else None
                if j:
                    j['from_external'] = True
                return j

        return None

    def get_explained_link(self, comic_id):
        return XKCD_EXPLAINED_URL.format(comic_id=comic_id)

    def _load_reverse(self):
        self.lock.acquire()
        data_store = self.bot._get_new_data_store_connection()
        try:
            while True:
                meta = self._get_meta(data_store, self.next_index)
                if not meta:
                    self._insert_meta(data_store, self.next_index)
                    meta = self._get_meta(data_store, self.next_index)
                if not meta:
                    return

                if meta[1]:  # json
                    self.index_json[self.next_index] = json.loads(meta[1])
                if meta[3] and meta[3] not in self.reverse_hash_index:  # hash_avg
                    self.reverse_hash_index[meta[3]] = self.next_index
                if meta[1]:  # json
                    parsed = urlparse(self.index_json[self.next_index].get('img', ''))
                    if parsed and parsed.path and parsed.path not in self.reverse_image_index:
                        self.reverse_image_index[parsed.path] = self.next_index

                self.next_index += 1
        finally:
            self.lock.release()
            data_store.close()

    def _get_meta(self, data_store, comic_id):
        return data_store.get_xkcd_meta(comic_id)

    def _insert_meta(self, data_store, comic_id):
        j = self._get_xkcd_json(comic_id)
        hash_avg = ''
        hash_d = ''
        hash_p = ''
        if not j:
            return
        if j.get('img'):
            file_name = '/tmp/' + get_random_file_name()
            try:
                self.myopener.retrieve(j.get('img'), file_name)
                hash_avg = imagehash.average_hash(Image.open(file_name))
                hash_d = imagehash.dhash(Image.open(file_name))
                hash_p = imagehash.phash(Image.open(file_name))
            except:
                pass
            finally:
                os.remove(file_name)
        data_store.insert_xkcd_meta(comic_id, json.dumps(j), str(hash_avg), str(hash_d), str(hash_p))

    def _get_xkcd_json(self, comic_id):
        if int(comic_id) == 404:
            return {'title': '404', 'transcript': '404', 'alt': '404'}

        try:
            response = urllib2.urlopen(XKCD_JSON_API_URL.format(comic_id=comic_id))
            html = response.read()
            return json.loads(html)
        except:
            return None