예제 #1
0
def getFiveDigitZCTAs(depends_on="http://www.census.gov/geo/www/cob/z52000.html", creates=root + "FiveDigitZCTAs/"):
    MakeDir(creates)
    wget(depends_on, creates + "__Index.html")
    Soup = BS.BeautifulSoup(open(creates + "__Index.html"))
    A = [
        (Contents(x.findParent()).split(" - ")[0].strip(), str(dict(x.attrs)["href"]))
        for x in Soup.findAll("a")
        if "_shp" in str(x)
    ]
    for (name, url) in A:
        print "Downloading", name
        wget("http://www.census.gov" + url, creates + url.split("/")[-1])
        os.system("cd " + creates + " ; unzip " + url.split("/")[-1])
예제 #2
0
파일: bls.py 프로젝트: govdata/govdata-core
def WgetMultiple(link, fname, maxtries=10):
    link = link if is_string_like(link) else link['URL']
    opstring = '--user-agent="Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7"'
    time.sleep(5)
    for i in range(maxtries):
        wget(link, fname, opstring)
        F = open(fname,'r').read().strip()
        if F.startswith('<!DOCTYPE HTML'):
            return
        else:
            print 'download of ' + link + ' failed: ' + F[:20]
            time.sleep(15)
            
    print 'download of ' + link + ' failed after ' + str(maxtries) + ' attempts'
    return
예제 #3
0
def bls_downloader(download_dir,code):

    MakeDirs(download_dir)
    download_dir += ('/' if download_dir[-1] != '/' else '')

    MakeDir(download_dir + 'RawDownloads/')
    
    get = "ftp://ftp.bls.gov/pub/time.series/" + code + '/'
    
    WgetMultiple(get,download_dir + 'RawDownloads/index.html')
    Soup = BeautifulSoup(open(download_dir + 'RawDownloads/index.html'))
    A = Soup.findAll('a')
    Records = [(Contents(a),str(dict(a.attrs)['href'])) for a in A]
    Records = [r for r in Records if 'Current' not in r[0].split('.')]
    RecordsR = [r for r in Records if 'AllData' in r[0]]
    if RecordsR:
        Records = RecordsR + [r for r in Records if not '.data.' in r[0]]
    T = tb.tabarray(records = Records,names = ['File','URL'])
    for (f,u) in T:
        wget(u,download_dir + 'RawDownloads/' + f + '.txt')

    makemetadata(code,download_dir + 'RawDownloads/',download_dir + 'metadata.pickle',download_dir + 'filenames.tsv')
    
    MakeDir(download_dir + '__FILES__')
    
    processtextfile(download_dir + 'RawDownloads/',download_dir + '/__FILES__/documentation.txt')
    
    MakeDir(download_dir + '__PARSE__')
    for l in listdir(download_dir  + 'RawDownloads/'):
        if '.data.' in l:
            Rename(download_dir + 'RawDownloads/' + l, download_dir + '__PARSE__/' + l)

    SPs = [download_dir + 'RawDownloads/' + l for l in listdir(download_dir + 'RawDownloads/') if l.endswith('.series.txt')]
    assert len(SPs) == 1, 'Wrong number of series paths.'
    serpath = SPs[0]    
    parse_series(download_dir + 'RawDownloads/',download_dir + 'series.txt')
    delete(serpath)
예제 #4
0
def getStates(depends_on="http://www.census.gov/geo/cob/bdy/st/st00shp/st99_d00_shp.zip", creates=root + "States/"):
    MakeDir(creates)
    wget(depends_on, creates + "st99_d00_shp.zip")
    os.system("cd " + creates + " ; unzip st99_d00_shp.zip")
예제 #5
0
def getMSA(depends_on="http://www.census.gov/geo/www/cob/mmsa2003.html", creates=root + "MSA/"):
    MakeDir(creates)
    for x in ["cs99_03c_shp.zip", "cb99_03c_shp.zip", "md99_03c_shp.zip"]:
        wget("http://www.census.gov/geo/cob/bdy/metroarea/2003/shp/" + x, creates + x)
        os.system("cd " + creates + " ; unzip " + x)
예제 #6
0
def getCounties(depends_on="http://www.census.gov/geo/cob/bdy/co/co00shp/co99_d00_shp.zip", creates=root + "Counties/"):
    MakeDir(creates)
    wget(depends_on, creates + "co99_d00_shp.zip")
    os.system("cd " + creates + " ; unzip co99_d00_shp.zip")
예제 #7
0
def getCensusRegions(
    depends_on="http://www.census.gov/geo/cob/bdy/rg/rg99_d00_shp.zip", creates=root + "CensusRegions/"
):
    MakeDir(creates)
    wget(depends_on, creates + "rg99_d00_shp.zip")
    os.system("cd " + creates + " ; unzip rg99_d00_shp.zip")
예제 #8
0
def modwget(dd,path):
	if 'opstring' in dd.dtype.names:
		wget(dd['URL'],path,opstring=dd['opstring'])
	else:
		wget(dd['URL'],path)