Exemplo n.º 1
0
Arquivo: turk.py Projeto: omert/simexp
def shortcut_to_server(contents):
    url = "http://65.215.1.20/faces/add_shortcut.py?"
    url += urllib.urlencode([("contents",contents)])
    f=tools.my_url_open(url)
    r = f.read().strip()
    assert server_to_shortcut(r).strip().splitlines()==contents.strip().splitlines()
    return r
Exemplo n.º 2
0
def shortcut_to_server(contents):
    url = "http://65.215.1.20/faces/add_shortcut.py?"
    url += urllib.urlencode([("contents", contents)])
    f = tools.my_url_open(url)
    r = f.read().strip()
    assert server_to_shortcut(
        r).strip().splitlines() == contents.strip().splitlines()
    return r
Exemplo n.º 3
0
def get_flower_urls():

    urlstring = "http://flower-dictionary.com/list.php?page="
    lst = []
    for i in range(19):
        html = tools.my_url_open(urlstring + str(i)).read()
        lst += re.findall('href=\\".*?.htm\\"', html)
    return lst
Exemplo n.º 4
0
def get_flower_urls():

    urlstring = "http://flower-dictionary.com/list.php?page="
    lst = []
    for i in range(19):
        html = tools.my_url_open(urlstring+str(i)).read()
        lst+= re.findall('href=\\".*?.htm\\"',html)
    return lst
Exemplo n.º 5
0
import tools, time
from PIL import Image
import re
from urllib import urlretrieve

urlstring = "http://www.lifeprint.com/asl101/gifs-animated/"
html = tools.my_url_open(urlstring).read()
lst = re.findall('href.*</a>', html)
lst2 = [i[6:i.rindex('"')] for i in lst]
lst3 = [i[i.rindex('"') + 2:-4] for i in lst]
#for i in range(len(lst2)):
#    urlretrieve("http://www.lifeprint.com/asl101/gifs-animated/"+lst2[i],"c:/data/signs/"+lst3[i]+".gif")
Exemplo n.º 6
0
def server_to_shortcut(label="test"):
    url = "http://65.215.1.20/faces/add_shortcut.py?" + urllib.urlencode(
        [("name", label)])
    f = tools.my_url_open(url)
    r = f.read().strip()
    return r
Exemplo n.º 7
0
urlstring = "http://www.amazon.com/gp/search/ref=sr_hi_4?rh=n%3A228013%2Cn%3A%21468240%2Cn%3A551240%2Cn%3A13397641%2Cn%3A324030011&bbn=324030011&ie=UTF8&qid=1286030645#/ref=sr_pg_2?rh=n%3A228013%2Cn%3A%21468240%2Cn%3A551240%2Cn%3A13397641%2Cn%3A324030011%2Cp_6%3AA2PHD1PQGLN1MQ&page=pG&bbn=324030011&sort=pmrank&ie=UTF8&qid=1286031509"


s=""
for i in range(163):
    s+=str(i)+"_"
    
    
print s
exit(0)

imgs2 = []
for p in range(2,4):
    url = urlstring.replace("pG",str(p))
    print url
    html = tools.my_url_open(url).read()
    #print html
    imgs = html.split("AA160_.jpg")
    
    for im in imgs[:-1]:
        if im.rfind("http:")!=-1:
            imgs2.append(im[im.rfind("http:"):]+"AA160_.jpg")
    
    st = "<html>"
    for i in imgs2:
        st+="<img src='"+i+"'>\n "
    tools.my_write('c:/temp/tiles/massive2.html',st)
    
    print "Page ",p,"got ",len(imgs2),"images"
    time.sleep(2)
Exemplo n.º 8
0
import tools, time
from PIL import Image

urlstring = "http://www.amazon.com/s/qid=1285958507/ref=sr_pg_pG?ie=UTF8&sort=-price&keywords=ties&bbn=1036592&rh=k%3Aties%2Cn%3A1036592%2Cn%3A%211036682%2Cp_6%3AA32FQKBE4XLKI7&page=pG"

a=tools.SimpleDisplay()

imgs2 = []
for p in range(66):
    html = tools.my_url_open(urlstring.replace("pG",str(p))).read()
    imgs = html.split("190,246_.jpg")
    
    for im in imgs[:-1]:
        if im.rfind("http:")!=-1:
            imgs2.append(im[im.rfind("http:"):]+"190,246_.jpg")
    
    st = "<html>"
    for i in imgs2:
        st+="<img src='"+i+"'>\n "
    tools.my_write('c:/temp/ties/massive2.html',st)
    
    print "Page ",p,"got ",len(imgs2),"images"
    time.sleep(2)

Exemplo n.º 9
0
from PIL import Image

urlstring = "http://www.amazon.com/gp/search/ref=sr_hi_4?rh=n%3A228013%2Cn%3A%21468240%2Cn%3A551240%2Cn%3A13397641%2Cn%3A324030011&bbn=324030011&ie=UTF8&qid=1286030645#/ref=sr_pg_2?rh=n%3A228013%2Cn%3A%21468240%2Cn%3A551240%2Cn%3A13397641%2Cn%3A324030011%2Cp_6%3AA2PHD1PQGLN1MQ&page=pG&bbn=324030011&sort=pmrank&ie=UTF8&qid=1286031509"

s = ""
for i in range(163):
    s += str(i) + "_"

print s
exit(0)

imgs2 = []
for p in range(2, 4):
    url = urlstring.replace("pG", str(p))
    print url
    html = tools.my_url_open(url).read()
    #print html
    imgs = html.split("AA160_.jpg")

    for im in imgs[:-1]:
        if im.rfind("http:") != -1:
            imgs2.append(im[im.rfind("http:"):] + "AA160_.jpg")

    st = "<html>"
    for i in imgs2:
        st += "<img src='" + i + "'>\n "
    tools.my_write('c:/temp/tiles/massive2.html', st)

    print "Page ", p, "got ", len(imgs2), "images"
    time.sleep(2)
Exemplo n.º 10
0
def get_gogh_urls():

    urlstring = "http://www.vangoghgallery.com/catalog/Painting/"
    html = tools.my_url_open(urlstring).read()
    lst = re.findall('catalog/Painting/.*?html', html)
    return lst
Exemplo n.º 11
0
def get_gogh_urls():

    urlstring = "http://www.vangoghgallery.com/catalog/Painting/"
    html = tools.my_url_open(urlstring).read()
    lst = re.findall('catalog/Painting/.*?html', html)
    return lst


lst = tools.uniqueify(get_gogh_urls())
lst2 = [i[17:] for i in lst]
ref_urls = [
    "http://www.vangoghgallery.com/catalog/Painting/" + i for i in lst2
]
img_urls = []
for i in ref_urls:
    html = tools.my_url_open(i).read()
    s = re.findall("image/.*?/.*?.jpg", html)
    assert len(s) == 2 and s[0] == s[1]
    print i
    img_urls.append("http://www.vangoghgallery.com/catalog/" + s[0])

#lst2 = [i[6:-1] for i in lst]
#lst4 = []
#for i in lst2:
#    html = tools.my_url_open("http://flower-dictionary.com/"+i).read()
#    lst3=re.findall("/uploads/flowers/.*?.jpg",html)
#    if len(lst3)!=1:
#        print "SHOOT:",i,len(lst3)
#    lst4+=lst3
#lst2b = [i[:-4] for i in lst2]
#lst5 = [i[17:] for i in lst4]
Exemplo n.º 12
0
Arquivo: turk.py Projeto: omert/simexp
def server_to_shortcut(label="test"):
    url="http://65.215.1.20/faces/add_shortcut.py?"+urllib.urlencode([("name",label)])
    f=tools.my_url_open(url)
    r = f.read().strip()
    return r
Exemplo n.º 13
0
def get_gogh_urls():

    urlstring = "http://www.vangoghgallery.com/catalog/Painting/"
    html = tools.my_url_open(urlstring).read()
    lst= re.findall('catalog/Painting/.*?html',html)
    return lst
Exemplo n.º 14
0
import re
from urllib import urlretrieve

def get_gogh_urls():

    urlstring = "http://www.vangoghgallery.com/catalog/Painting/"
    html = tools.my_url_open(urlstring).read()
    lst= re.findall('catalog/Painting/.*?html',html)
    return lst
    
lst=tools.uniqueify(get_gogh_urls())
lst2=[i[17:] for i in lst]
ref_urls = ["http://www.vangoghgallery.com/catalog/Painting/"+i for i in lst2]
img_urls = []
for i in ref_urls:
    html = tools.my_url_open(i).read()
    s = re.findall("image/.*?/.*?.jpg",html)
    assert len(s)==2 and s[0]==s[1]
    print i
    img_urls.append("http://www.vangoghgallery.com/catalog/"+s[0])



#lst2 = [i[6:-1] for i in lst]
#lst4 = []
#for i in lst2:
#    html = tools.my_url_open("http://flower-dictionary.com/"+i).read()
#    lst3=re.findall("/uploads/flowers/.*?.jpg",html)    
#    if len(lst3)!=1:
#        print "SHOOT:",i,len(lst3)
#    lst4+=lst3
Exemplo n.º 15
0
from urllib import urlretrieve

def get_flower_urls():

    urlstring = "http://flower-dictionary.com/list.php?page="
    lst = []
    for i in range(19):
        html = tools.my_url_open(urlstring+str(i)).read()
        lst+= re.findall('href=\\".*?.htm\\"',html)
    return lst
    
lst=get_flower_urls()    
lst2 = [i[6:-1] for i in lst]
lst4 = []
for i in lst2:
    html = tools.my_url_open("http://flower-dictionary.com/"+i).read()
    lst3=re.findall("/uploads/flowers/.*?.jpg",html)    
    if len(lst3)!=1:
        print "SHOOT:",i,len(lst3)
    lst4+=lst3
lst2b = [i[:-4] for i in lst2]
lst5 = [i[17:] for i in lst4]

n = len(lst5)

names = []
nums = []

for i in range(n):
    if lst5[i] not in lst5[:i]:
        nums.append(lst5[i])
Exemplo n.º 16
0
            bbb = aaa+nxt[aaa:].find("</span>")
            if bbb==-1: 
                print "YO7"
                continue
            res['att_price'] = float(nxt[aaa:bbb].replace(",",""))
            results.append(res)
        except:
            pass
    return results


for p in range(326,328): 
    allresults = []
    url = urlstring.replace("pG",str(p))
    #print url
    html = tools.my_url_open(url).read()
    allresults=scrape_amazon_page(html)
    print "results page",p, len(allresults), len(allresults)/24., len(tools.uniqueify(allresults))
  
    i=0
    ar2 = []
    for r in allresults:
        i=i+1
        imfname = "c:/temp/tiles2/"+r['ASIN']+".jpg"
        if not os.path.exists(imfname):
            try:
                print "Scraping object",i,"page",p,(p-1)*24+i
                html = tools.my_url_open(r['url']).read()
                r['att_glass']=1 if 'Glass Tiles</a>' in html else 0
                r['att_ceramic']=1 if 'Ceramic Tiles</a>' in html else 0
                r['att_stone']=1 if 'Stone Tiles</a>' in html else 0
Exemplo n.º 17
0
            bbb = aaa + nxt[aaa:].find("</span>")
            if bbb == -1:
                print "YO7"
                continue
            res['att_price'] = float(nxt[aaa:bbb].replace(",", ""))
            results.append(res)
        except:
            pass
    return results


for p in range(326, 328):
    allresults = []
    url = urlstring.replace("pG", str(p))
    #print url
    html = tools.my_url_open(url).read()
    allresults = scrape_amazon_page(html)
    print "results page", p, len(allresults), len(allresults) / 24., len(
        tools.uniqueify(allresults))

    i = 0
    ar2 = []
    for r in allresults:
        i = i + 1
        imfname = "c:/temp/tiles2/" + r['ASIN'] + ".jpg"
        if not os.path.exists(imfname):
            try:
                print "Scraping object", i, "page", p, (p - 1) * 24 + i
                html = tools.my_url_open(r['url']).read()
                r['att_glass'] = 1 if 'Glass Tiles</a>' in html else 0
                r['att_ceramic'] = 1 if 'Ceramic Tiles</a>' in html else 0
Exemplo n.º 18
0
import tools, time
from PIL import Image
import re
from urllib import urlretrieve

urlstring = "http://www.lifeprint.com/asl101/gifs-animated/"
html = tools.my_url_open(urlstring).read()
lst= re.findall('href.*</a>',html)
lst2 = [i[6:i.rindex('"')] for i in lst]
lst3 = [i[i.rindex('"')+2:-4] for i in lst]
#for i in range(len(lst2)):
#    urlretrieve("http://www.lifeprint.com/asl101/gifs-animated/"+lst2[i],"c:/data/signs/"+lst3[i]+".gif")

Exemplo n.º 19
0
def get_flower_urls():

    urlstring = "http://flower-dictionary.com/list.php?page="
    lst = []
    for i in range(19):
        html = tools.my_url_open(urlstring + str(i)).read()
        lst += re.findall('href=\\".*?.htm\\"', html)
    return lst


lst = get_flower_urls()
lst2 = [i[6:-1] for i in lst]
lst4 = []
for i in lst2:
    html = tools.my_url_open("http://flower-dictionary.com/" + i).read()
    lst3 = re.findall("/uploads/flowers/.*?.jpg", html)
    if len(lst3) != 1:
        print "SHOOT:", i, len(lst3)
    lst4 += lst3
lst2b = [i[:-4] for i in lst2]
lst5 = [i[17:] for i in lst4]

n = len(lst5)

names = []
nums = []

for i in range(n):
    if lst5[i] not in lst5[:i]:
        nums.append(lst5[i])