Пример #1
0
def import_url (url, rd, progress=None, add_webpage_source=True, threaded=False,
                interactive=True):
    """Import information from URL.
    We handle HTML with scrape_url.

    Everything else, we hand back to our caller as a list of
    files. This is a little stupid -- it would be more elegant to just
    hand back a class, but our importer stuff is a little munged up
    with gui-ness and it's just going to have to be ugly for now
    """
    if progress: progress(0.01,'Fetching webpage')
    sock=urllib.request.urlopen(url)
    header=sock.headers.get('content-type','text/html')
    if progress: progress(0.02, 'Reading headers')
    if header.find('html')>=0:
        #return scrape_url(url,progress)
        return WebPageImporter(rd,
                               url,
                               prog=progress,
                               add_webpage_source=add_webpage_source,
                               threaded=threaded,
                               interactive=interactive)
    elif header=='application/zip':
        import zip_importer
        return zip_importer.zipfile_to_filelist(sock,progress,os.path.splitext(url.split('/')[-1])[0])
    else:
        fn = os.path.join(tempfile.tempdir,url.split('/')[-1])
        while os.path.exists(fn):
            fn=add_to_fn(fn)
        ofi = open(fn,'w')
        ofi.write(get_url(sock,progress))
        ofi.close()
        return [fn]
Пример #2
0
def import_url (url, rd, progress=None, add_webpage_source=True, threaded=False,
                interactive=True):
    """Import information from URL.
    We handle HTML with scrape_url.

    Everything else, we hand back to our caller as a list of
    files. This is a little stupid -- it would be more elegant to just
    hand back a class, but our importer stuff is a little munged up
    with gui-ness and it's just going to have to be ugly for now
    """
    if progress: progress(0.01,'Fetching webpage')
    sock=urllib.urlopen(url)
    header=sock.headers.get('content-type','text/html')
    if progress: progress(0.02, 'Reading headers')
    if header.find('html')>=0:
        #return scrape_url(url,progress)
        return WebPageImporter(rd,
                               url,
                               prog=progress,
                               add_webpage_source=add_webpage_source,
                               threaded=threaded,
                               interactive=interactive)
    elif header=='application/zip':
        import zip_importer
        return zip_importer.zipfile_to_filelist(sock,progress,os.path.splitext(url.split('/')[-1])[0])
    else:
        fn = os.path.join(tempfile.tempdir,url.split('/')[-1])
        while os.path.exists(fn):
            fn=add_to_fn(fn)
        ofi = open(fn,'w')
        ofi.write(get_url(sock,progress))
        ofi.close()
        return [fn]