Python safe_write 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: utilities

메소드/함수: safe_write

hotexamples.com에서의 예제들: 4

Python safe_write - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 utilities.safe_write에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: get_data.py 프로젝트: shenglih/web-scraping

def get_listing_urls(br):
    """
    Searches StreetEasy for all rental apartment listings in
    Williamsburg, caches each page of search results to the directory
    whose name is stored in the variable SEARCH_RESULTS_DIR, and
    caches the URLs for the listings (one per line) to the file whose
    name is stored in the variable LISTING_URLS_FILE.

    Arguments:

    br -- Browser object
    """

    if os.path.exists(LISTING_URLS_FILE):
        return

    makedir(os.path.dirname(LISTING_URLS_FILE))

    br.open(SEARCH_URL)

    br.select_form(nr=1)
    #    print br.form
    br.form['area[]'] = ['302']
    response = br.submit()
    results_url = response.geturl()

    with safe_write(LISTING_URLS_FILE) as f:
        while True:

            filename = download_url(br, results_url, SEARCH_RESULTS_DIR)
            soup = BeautifulSoup(file(filename).read())

            results = soup.findAll('div', attrs={'class': 'details_title'})

            urls = []

            for r in results:

                r = r.find('h5')
                r = r.find('a')
                r = r.get('href')

                urls.append('http://streeteasy.com' + r)


#            urls = ['http://www.streeteasy.com' + r.find('h5').find('a').get('href') for r in soup.findAll('div', attrs={'class': 'details_title' })]

            f.write('\n'.join(urls))
            f.write('\n')
            f.flush()

            nav = soup.find('a', attrs={'class': 'next_page'})

            try:
                results_url = 'http://www.streeteasy.com' + nav.get('href')
            except AttributeError:
                break

예제 #2

파일 보기

파일: get_data.py 프로젝트: hannawallach/web-scraping

def get_listing_urls(br):
    """
    Searches StreetEasy for all rental apartment listings in
    Williamsburg, caches each page of search results to the directory
    whose name is stored in the variable SEARCH_RESULTS_DIR, and
    caches the URLs for the listings (one per line) to the file whose
    name is stored in the variable LISTING_URLS_FILE.

    Arguments:

    br -- Browser object
    """

    if os.path.exists(LISTING_URLS_FILE):
        return

    makedir(os.path.dirname(LISTING_URLS_FILE))

    br.open(SEARCH_URL)

    br.select_form(nr=1)
#    print br.form
    br.form['area[]'] = ['302']
    response = br.submit()
    results_url = response.geturl()

    with safe_write(LISTING_URLS_FILE) as f:
        while True:

            filename = download_url(br, results_url, SEARCH_RESULTS_DIR)
            soup = BeautifulSoup(file(filename).read())

            results = soup.findAll('div', attrs={'class': 'details_title' })

            urls = []

            for r in results:

                r = r.find('h5')
                r = r.find('a')
                r = r.get('href')

                urls.append('http://streeteasy.com' + r)

#            urls = ['http://www.streeteasy.com' + r.find('h5').find('a').get('href') for r in soup.findAll('div', attrs={'class': 'details_title' })]

            f.write('\n'.join(urls))
            f.write('\n')
            f.flush()

            nav = soup.find('a', attrs={'class': 'next_page'})

            try:
                results_url = 'http://www.streeteasy.com' + nav.get('href')
            except AttributeError:
                break

예제 #3

파일 보기

파일: get_data.py 프로젝트: hannawallach/web-scraping

def get_listing_data():

    with safe_write(CSV_FILE) as f:
        for filename in iterview(glob(LISTING_PAGES_DIR + '/*')):

            contents = file(filename).read()
#            print contents

            try:
                [obj] = re.findall('dataLayer\s*=\s*\[(.*)\];', contents)
                obj = loads(obj)
            except ValueError:
                return

            if 'listPrice' in obj and 'listBed' in obj:
                text = '\t'.join((os.path.basename(filename),
                                  str(obj['listPrice']), str(obj['listBed'])))
                f.write(text)
                f.write('\n')
                f.flush()

예제 #4

파일 보기

파일: get_data.py 프로젝트: shenglih/web-scraping

def get_listing_data():

    with safe_write(CSV_FILE) as f:
        for filename in iterview(glob(LISTING_PAGES_DIR + '/*')):

            contents = file(filename).read()
            #            print contents

            try:
                [obj] = re.findall('dataLayer\s*=\s*\[(.*)\];', contents)
                obj = loads(obj)
            except ValueError:
                return

            if 'listPrice' in obj and 'listBed' in obj:
                text = '\t'.join((os.path.basename(filename),
                                  str(obj['listPrice']), str(obj['listBed'])))
                f.write(text)
                f.write('\n')
                f.flush()