Exemplo n.º 1
0
    return found


found_books = set()

prev = ''
for filename in (i for i in os.listdir(arc_dir) if i.endswith('.arc')):
    if not filename.startswith('20100412'):
        continue
    for url, wire in read_arc(arc_dir + '/' + filename):
        #print filename, url
        if url.startswith('file'):
            continue
        if not url.startswith('http://www.amazon.com/s?'):
            continue
        body = read_body(wire)
        m = re_title.search(body)
        if m.group(1) != prev:
            print(m.group(1))
            prev = m.group(1)
        continue
        doc = fromstring(body)
        try:
            doc.get_element_by_id('noResultsTitle')
            continue
        except KeyError:
            pass
        rc = doc.find_class('resultCount')
        if rc:
            m = re_result_count.match(rc[0].text)
            if m:
Exemplo n.º 2
0
        found.append(m.group(1))
    return found

found_books = set()

prev = ''
for filename in (i for i in os.listdir(arc_dir) if i.endswith('.arc')):
    if not filename.startswith('20100412'):
        continue
    for url, wire in read_arc(arc_dir +'/' + filename):
        #print filename, url
        if url.startswith('file'):
            continue
        if not url.startswith('http://www.amazon.com/s?'):
            continue
        body = read_body(wire)
        m = re_title.search(body)
        if m.group(1) != prev:
            print(m.group(1))
            prev = m.group(1)
        continue
        doc = fromstring(body)
        try:
            doc.get_element_by_id('noResultsTitle')
            continue
        except KeyError:
            pass
        rc = doc.find_class('resultCount')
        if rc:
            m = re_result_count.match(rc[0].text)
            if m: