Пример #1
0
def download_latest_hlebsol():
    from pbs import wget, mv, rm, cp

    wget('-N', '-S', 'http://hleb-sol.biz/templates/1.xls')
    mv('1.xls', 'dinner/fixtures/hlebsol-current.xls')

    wget('-N', '-S', 'http://hleb-sol.biz/templates/2.xls')
    mv('2.xls', 'dinner/fixtures/hlebsol-next.xls')

    cp('dinner/fixtures/hlebsol-current.xls',   'dinner/fixtures/fusion-current.xls')
    cp('dinner/fixtures/hlebsol-next.xls',      'dinner/fixtures/fusion-next.xls')
Пример #2
0
def rip_site(domain_name, output_dir, timeout=120):    
    user_agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.22) Gecko/20110905 Ubuntu/10.04 (lucid) Firefox/3.6.22"
    sh.wget("-U", user_agent, "--no-check-certificate", "--quiet", "-EHkp",
        "--read-timeout", timeout - 10, "-t", 2, "-nd", "-P", output_dir, domain_name, _ok_code=[3,0,8])
    
    # default to index.html
    index = join(output_dir, "index.html")
    if exists(index): output_file = index
    else:
        # otherwise just use the biggest html file
        biggest_html_file = (None, 0)
        for f in os.listdir(output_dir):
            if f.endswith(".html"):
                size = os.stat(join(output_dir, f)).st_size
                if size > biggest_html_file[1]:
                    biggest_html_file = (f, size)
        
        if biggest_html_file[0]: output_file = join(output_dir, biggest_html_file[0])
        else: raise Exception, "no html file found in download"
        
    return output_file