예제 #1
0
파일: urls.py 프로젝트: MrW24/wbot
def unmatched_url(match, chan, db):
    disabled_commands = database.get(db, 'channels', 'disabled', 'chan', chan)

    try:
        r = requests.get(match,
                         headers=headers,
                         allow_redirects=True,
                         stream=True)
    except Exception as e:
        return formatting.output('URL', ['Error: {}'.format(e)])

    domain = urlparse(match).netloc

    if r.status_code != 404:
        content_type = r.headers['Content-Type']
        try:
            encoding = r.headers['content-encoding']
        except:
            encoding = ''

        if content_type.find("html") != -1:  # and content_type is not 'gzip':
            data = ''
            for chunk in r.iter_content(chunk_size=1024):
                data += chunk
                if len(data) > 48336: break

            body = html.fromstring(data)

            try:
                title = body.xpath('//title/text()')[0]
            except:
                return formatting.output('URL',
                                         ['No Title ({})'.format(domain)])

            try:
                title_formatted = text.fix_bad_unicode(
                    body.xpath('//title/text()')[0])
            except:
                title_formatted = body.xpath('//title/text()')[0]
            return formatting.output(
                'URL', ['{} ({})'.format(title_formatted, domain)])
        else:
            if disabled_commands:
                if 'filesize' in disabled_commands: return
            try:
                if r.headers['Content-Length']:
                    length = int(r.headers['Content-Length'])
                    if length < 0: length = 'Unknown size'
                    else: length = formatting.filesize(length)
                else:
                    length = "Unknown size"
            except:
                length = "Unknown size"
            if "503 B" in length: length = ""
            if length is None: length = ""
            return formatting.output(
                'URL',
                ['{} Size: {} ({})'.format(content_type, length, domain)])
    return
예제 #2
0
파일: urls.py 프로젝트: Anonymike/pasta-bot
def unmatched_url(match,chan,db):
    disabled_commands = database.get(db,'channels','disabled','chan',chan)

    try:
	r = requests.get(match, headers=headers,allow_redirects=True, stream=True)
    except Exception as e:
	return formatting.output('URL', ['Error: {}'.format(e)])

    domain = urlparse(match).netloc

    if r.status_code != 404:
        content_type = r.headers['Content-Type']
        try: encoding = r.headers['content-encoding']
        except: encoding = ''

        if content_type.find("html") != -1: # and content_type is not 'gzip':
	    data = ''
	    for chunk in r.iter_content(chunk_size=1024):
		data += chunk
		if len(data) > 48336: break

            body = html.fromstring(data)

            try: title = body.xpath('//title/text()')[0]
	    except: return formatting.output('URL', ['No Title ({})'.format(domain)])

            try: title_formatted = text.fix_bad_unicode(body.xpath('//title/text()')[0])
            except: title_formatted = body.xpath('//title/text()')[0]
            return formatting.output('URL', ['{} ({})'.format(title_formatted, domain)])
        else:
	    if disabled_commands:
                if 'filesize' in disabled_commands: return
            try:
                if r.headers['Content-Length']:
                    length = int(r.headers['Content-Length'])
                    if length < 0: length = 'Unknown size'
                    else: length = formatting.filesize(length)
                else:
                    length = "Unknown size"
            except:
                length = "Unknown size"
            if "503 B" in length: length = ""
            if length is None: length = ""
	    return formatting.output('URL', ['{} Size: {} ({})'.format(content_type, length, domain)])
    return
예제 #3
0
def message(post):
    if post['rating'] == u'e':
        rating = '\x02\x034NSFW\x03\x02'
    elif post['rating'] == u'q':
        rating = '\x02\x037questionable\x03\x02'
    elif post['rating'] == u's':
        rating = '\x02\x033safe\x03\x02'
    else:
        rating = 'unknown'

    id = '\x02#{}\x02'.format(post['id'])
    score = post['score']
    url = web.isgd(post['file_url'])
    size = formatting.filesize(post['file_size'])
    tags = post['tags']
    if len(tags) > 80:
        tags = '{}... (and {} more)'.format(tags[:80], tags.count(' '))  # this count() is wrong lol, close enough

    return "[{}] {} ({}) - Score: {} - Rating: {} - Tags: {}".format(id, url, size, score, rating, tags)
예제 #4
0
def unmatched_url(url, parsed, bot, chan, db):
    disabled_commands = database.get(db, 'channels', 'disabled', 'chan', chan) or []

    # don't bother if the channel has url titles disabled
    if 'urltitles' in disabled_commands:
        return

    # fetch, and hide all errors from the output
    try:
        req = requests.get(url, headers=headers, allow_redirects=True, stream=True, timeout=8)
    except Exception as e:
        print '[!] WARNING couldnt fetch url'
        print e
        return

    # parsing
    domain = parsed.netloc
    content_type = req.headers.get('Content-Type', '')
    size = req.headers.get('Content-Length', 0)
    output = ['[URL]']

    if 'html' in content_type:
        try:
            title = parse_html(req)
        except Exception as e:
            print '[!] WARNING the url caused a parser error'
            title = 'Untitled'

        # TODO handle titles with html entities
        if '&' in title and ';' in title:
            # pls fix
            title = title.replace('&quot;', '"')

        # f*****g cloudflare
        if 'Attention Required! | Cloudflare' in title:
            return

        output.append(title)

    else:
        if 'filesize' in disabled_commands:
            return

        # very common mime types
        if 'image/' in content_type:
            output.append(content_type.replace('image/', '') + ' image,')
        elif 'video/' in content_type:
            output.append(content_type.replace('video/', '') + ' video,')
        elif 'text/' in content_type:
            output.append('text file,')
        elif 'application/octet-stream' == content_type:
            output.append('binary file,')
        elif 'audio/' in content_type:
            output.append('audio file,')

        # other mime types
        elif 'application/vnd.' in content_type:
            output.append('unknown binary file,')
        elif 'font/' in content_type:
            output.append('font,')

        # i dunno
        else:
            output.append(content_type + ' file,')

        try:
            size = int(size)
        except TypeError:
            size = 0

        # some pages send exactly 503 or 513 bytes of empty padding as an
        # internet explorer 5 and 6 workaround, but since that browser is
        # super dead this should probably be removed.
        # more at https://stackoverflow.com/a/11544049/4301778
        if size == 0 or size == 503 or size == 513:
            output.append('unknown size')
        else:
            output.append('size: ' + formatting.filesize(size))

    # output formatting
    output = ' '.join(output)

    if len(output) > MAX_LENGTH:
        output = output[:MAX_LENGTH] + '...'

    # add domain to the end
    output = "{} ({})".format(output, domain)

    # show error codes if they appear
    if req.status_code >= 400 and req.status_code < 600:
        output = '{} (error {})'.format(output, req.status_code)

    return output
예제 #5
0
파일: system.py 프로젝트: lity99/uguubot
def convert_kilobytes(kilobytes):
    return formatting.filesize(kilobytes*1024).replace('iB', 'B')