Exemplo n.º 1
0
def get_code(num):
    print 'get page...'
    url = 'http://www.c.happycodings.com/code_snippets/code%d.html' % num
    text = upen(url).read()
    print 'got'
    code = re.findall('<TEXTAREA[^>]*>(.+?)</TEXTAREA>', text, re.S)
    return code[0]
Exemplo n.º 2
0
def get_code(num):
    print 'get page...'
    url = 'http://www.c.happycodings.com/code_snippets/code%d.html' % num
    text = upen(url).read()
    print 'got'
    code = re.findall('<TEXTAREA[^>]*>(.+?)</TEXTAREA>', text, re.S)
    return code[0]
Exemplo n.º 3
0
def get_code(num):
    print "get page..."
    url = "http://www.c.happycodings.com/code_snippets/code%d.html" % num
    text = upen(url).read()
    print "got"
    code = re.findall("<TEXTAREA[^>]*>(.+?)</TEXTAREA>", text, re.S)
    return code[0]
Exemplo n.º 4
0
from urllib import urlopen as upen
import re
base = 'http://etext.library.adelaide.edu.au/f/fitzgerald/f_scott/gatsby/'

from htmlentitydefs import entitydefs, codepoint2name
print entitydefs, codepoint2name[8220]
fail


def conv(x):
    x = x.group()
    x = int(x[2:-1])
    return entitydefs[codepoint2name[x]]


html = upen(base).read()
all = re.findall('\<a href="chapter(\d+)\.html"\>Chapter', html)
print all
reg = '<div[^>]*>(.+?)</div>'
for i in all:
    text = upen(base + 'chapter' + i + '.html').read()
    main = re.findall(reg, text, re.S)[1]
    main = re.sub('<.+?>', '', main).replace('\n\n', '<br><br>')
    main = main.replace('\n', ' ').replace('<br><br>', '\n')
    main = re.sub('&#\d+;', conv, main)

    open('../prog/gatsby/chapter%s.txt' % i, 'w').write(main)
Exemplo n.º 5
0
Arquivo: gimg.py Projeto: jaredly/prog
#!/usr/bin/env python
import cgi
import sys
from urllib import urlopen as upen
form = cgi.FieldStorage()
if form.has_key('url'):
    print 'Content-type:image/gif\n'
    sys.stdout.write(upen(form['url'].value).read())
else:
    print 'Content-type:text/html\n'
    print 'hi';
Exemplo n.º 6
0
#!/usr/bin/env python
import cgi
import sys
from urllib import urlopen as upen

form = cgi.FieldStorage()
if form.has_key('url'):
    print 'Content-type:image/gif\n'
    sys.stdout.write(upen(form['url'].value).read())
else:
    print 'Content-type:text/html\n'
    print 'hi'
Exemplo n.º 7
0
from urllib import urlopen as upen
import re
base = 'http://etext.library.adelaide.edu.au/f/fitzgerald/f_scott/gatsby/'

from htmlentitydefs import entitydefs,codepoint2name
print entitydefs,codepoint2name[8220]
fail
def conv(x):
    x=x.group();
    x=int(x[2:-1])
    return entitydefs[codepoint2name[x]]

html = upen(base).read()
all = re.findall('\<a href="chapter(\d+)\.html"\>Chapter',html)
print all
reg = '<div[^>]*>(.+?)</div>'
for i in all:
    text = upen(base+'chapter'+i+'.html').read()
    main = re.findall(reg,text,re.S)[1]
    main = re.sub('<.+?>','',main).replace('\n\n','<br><br>')
    main = main.replace('\n',' ').replace('<br><br>','\n')
    main = re.sub('&#\d+;',conv,main)


    open('../prog/gatsby/chapter%s.txt'%i,'w').write(main)