def augment_trip_plan(raw_trip_plan):
    location_latlng = raw_trip_plan.location_latlng.to_json_obj() if raw_trip_plan.location_latlng else None
    entities = utils.parallelize(
        utils.retryable(augment_entity, retries=3),
        [(e, location_latlng) for e in raw_trip_plan.entities])
    trip_plan = raw_trip_plan.copy()
    for i, entity in enumerate(entities):
        # If there's an RPC error, some of these may come back as None.
        # So as a fallback make sure we at least save the incoming entity.
        # TODO: Return an error message here so the user can be notified
        # that not all entities were saved.
        if not entity:
            entities[i] = raw_trip_plan.entities[i]
    trip_plan.entities = entities
    return trip_plan
예제 #2
0
def build_scrapers(url, client_page_source=None, force_fetch_page=False, allow_expansion=True, for_guide=False):
    page_source_tree = html_parsing.parse_tree_from_string(client_page_source) if client_page_source else None
    if not page_source_tree and (url_requires_server_page_source(url) or force_fetch_page):
        page_source_tree = html_parsing.parse_tree(url)

    scraped_pages = []
    for scraper_class in ALL_SCRAPERS:
        handleable_urls = scraper_class.handleable_urls(url, page_source_tree, allow_expansion)
        if handleable_urls:
            reqs = [html_parsing.make_request(u) for u in handleable_urls]
            resps = utils.parallelize(utils.retryable(urllib2.urlopen, 3), [(req,) for req in reqs])
            for url, resp in zip(handleable_urls, resps):
                if not resp:
                    print "Failed to fetch url: %s" % url
                    continue
                tree = etree.parse(resp, html_parsing.htmlparser())
                scraper = scraper_class(url, tree, for_guide)
                scraped_pages.append(scraper)
            break
    return scraped_pages