def unmatched_url(match, chan, db): disabled_commands = database.get(db, 'channels', 'disabled', 'chan', chan) try: r = requests.get(match, headers=headers, allow_redirects=True, stream=True) except Exception as e: return formatting.output('URL', ['Error: {}'.format(e)]) domain = urlparse(match).netloc if r.status_code != 404: content_type = r.headers['Content-Type'] try: encoding = r.headers['content-encoding'] except: encoding = '' if content_type.find("html") != -1: # and content_type is not 'gzip': data = '' for chunk in r.iter_content(chunk_size=1024): data += chunk if len(data) > 48336: break body = html.fromstring(data) try: title = body.xpath('//title/text()')[0] except: return formatting.output('URL', ['No Title ({})'.format(domain)]) try: title_formatted = text.fix_bad_unicode( body.xpath('//title/text()')[0]) except: title_formatted = body.xpath('//title/text()')[0] return formatting.output( 'URL', ['{} ({})'.format(title_formatted, domain)]) else: if disabled_commands: if 'filesize' in disabled_commands: return try: if r.headers['Content-Length']: length = int(r.headers['Content-Length']) if length < 0: length = 'Unknown size' else: length = formatting.filesize(length) else: length = "Unknown size" except: length = "Unknown size" if "503 B" in length: length = "" if length is None: length = "" return formatting.output( 'URL', ['{} Size: {} ({})'.format(content_type, length, domain)]) return
def unmatched_url(match,chan,db): disabled_commands = database.get(db,'channels','disabled','chan',chan) try: r = requests.get(match, headers=headers,allow_redirects=True, stream=True) except Exception as e: return formatting.output('URL', ['Error: {}'.format(e)]) domain = urlparse(match).netloc if r.status_code != 404: content_type = r.headers['Content-Type'] try: encoding = r.headers['content-encoding'] except: encoding = '' if content_type.find("html") != -1: # and content_type is not 'gzip': data = '' for chunk in r.iter_content(chunk_size=1024): data += chunk if len(data) > 48336: break body = html.fromstring(data) try: title = body.xpath('//title/text()')[0] except: return formatting.output('URL', ['No Title ({})'.format(domain)]) try: title_formatted = text.fix_bad_unicode(body.xpath('//title/text()')[0]) except: title_formatted = body.xpath('//title/text()')[0] return formatting.output('URL', ['{} ({})'.format(title_formatted, domain)]) else: if disabled_commands: if 'filesize' in disabled_commands: return try: if r.headers['Content-Length']: length = int(r.headers['Content-Length']) if length < 0: length = 'Unknown size' else: length = formatting.filesize(length) else: length = "Unknown size" except: length = "Unknown size" if "503 B" in length: length = "" if length is None: length = "" return formatting.output('URL', ['{} Size: {} ({})'.format(content_type, length, domain)]) return
def message(post): if post['rating'] == u'e': rating = '\x02\x034NSFW\x03\x02' elif post['rating'] == u'q': rating = '\x02\x037questionable\x03\x02' elif post['rating'] == u's': rating = '\x02\x033safe\x03\x02' else: rating = 'unknown' id = '\x02#{}\x02'.format(post['id']) score = post['score'] url = web.isgd(post['file_url']) size = formatting.filesize(post['file_size']) tags = post['tags'] if len(tags) > 80: tags = '{}... (and {} more)'.format(tags[:80], tags.count(' ')) # this count() is wrong lol, close enough return "[{}] {} ({}) - Score: {} - Rating: {} - Tags: {}".format(id, url, size, score, rating, tags)
def unmatched_url(url, parsed, bot, chan, db): disabled_commands = database.get(db, 'channels', 'disabled', 'chan', chan) or [] # don't bother if the channel has url titles disabled if 'urltitles' in disabled_commands: return # fetch, and hide all errors from the output try: req = requests.get(url, headers=headers, allow_redirects=True, stream=True, timeout=8) except Exception as e: print '[!] WARNING couldnt fetch url' print e return # parsing domain = parsed.netloc content_type = req.headers.get('Content-Type', '') size = req.headers.get('Content-Length', 0) output = ['[URL]'] if 'html' in content_type: try: title = parse_html(req) except Exception as e: print '[!] WARNING the url caused a parser error' title = 'Untitled' # TODO handle titles with html entities if '&' in title and ';' in title: # pls fix title = title.replace('"', '"') # f*****g cloudflare if 'Attention Required! | Cloudflare' in title: return output.append(title) else: if 'filesize' in disabled_commands: return # very common mime types if 'image/' in content_type: output.append(content_type.replace('image/', '') + ' image,') elif 'video/' in content_type: output.append(content_type.replace('video/', '') + ' video,') elif 'text/' in content_type: output.append('text file,') elif 'application/octet-stream' == content_type: output.append('binary file,') elif 'audio/' in content_type: output.append('audio file,') # other mime types elif 'application/vnd.' in content_type: output.append('unknown binary file,') elif 'font/' in content_type: output.append('font,') # i dunno else: output.append(content_type + ' file,') try: size = int(size) except TypeError: size = 0 # some pages send exactly 503 or 513 bytes of empty padding as an # internet explorer 5 and 6 workaround, but since that browser is # super dead this should probably be removed. # more at https://stackoverflow.com/a/11544049/4301778 if size == 0 or size == 503 or size == 513: output.append('unknown size') else: output.append('size: ' + formatting.filesize(size)) # output formatting output = ' '.join(output) if len(output) > MAX_LENGTH: output = output[:MAX_LENGTH] + '...' # add domain to the end output = "{} ({})".format(output, domain) # show error codes if they appear if req.status_code >= 400 and req.status_code < 600: output = '{} (error {})'.format(output, req.status_code) return output
def convert_kilobytes(kilobytes): return formatting.filesize(kilobytes*1024).replace('iB', 'B')