Exemplo n.º 1
0
def fill_loop_by_room(config, survey_id):
    """
    Master routine for looping over rooms (after a search)
    to fill in the properties.
    """
    room_count = 0
    while room_count < config.FILL_MAX_ROOM_COUNT:
        try:
            if not config.HTTP_PROXY_LIST:
                logging.info(
                    "No proxies left: re-initialize after %s seconds",
                    config.RE_INIT_SLEEP_TIME)
                time.sleep(config.RE_INIT_SLEEP_TIME)  # be nice
                config = ABConfig()
            room_count += 1
            listing = db_get_room_to_fill(config, survey_id)
            if listing is None:
                return None
            else:
                if listing.ws_get_room_info(config.FLAGS_ADD):
                    pass
                else:  # Airbnb now seems to return nothing if a room has gone
                    listing.save_as_deleted()
        except AttributeError:
            logging.error("Attribute error: marking room as deleted.")
            listing.save_as_deleted()
        except Exception as e:
            logging.error("Error in fill_loop_by_room: %s", str(type(e)))
            raise
def main():
    ab_config = ABConfig()
    parser = \
        argparse.ArgumentParser(
            description="Create a spreadsheet of surveys from a city")
    parser.add_argument('-c', '--city',
                        metavar='city', action='store',
                        help="""set the city""")
    parser.add_argument('-p', '--project',
                        metavar='project', action='store', default="public",
                        help="""the project determines the table or view: public
                        for room, gis for listing_city, default public""")
    parser.add_argument('-f', '--format',
                        metavar='format', action='store', default="xlsx",
                        help="""output format (xlsx or csv), default xlsx""")
    parser.add_argument('-s', '--summary',
                        action='store_true', default=False,
                        help="create a summary spreadsheet instead of raw data")
    args = parser.parse_args()

    if args.city:
        if args.summary:
            export_city_summary(ab_config, args.city, args.project.lower())
        else:
            export_city_data(ab_config, args.city, args.project.lower(), args.format)
    else:
        parser.print_help()
Exemplo n.º 3
0
def main():
    (parser, args) = parse_args()
    logging.basicConfig(format='%(levelname)-8s%(message)s')
    ab_config = ABConfig(args)
    try:
        if args.search:
            survey = ABSurveyByNeighborhood(ab_config, args.search)
            survey.search(ab_config.FLAGS_ADD)
        elif args.search_by_neighborhood:
            survey = ABSurveyByNeighborhood(ab_config, args.search_by_neighborhood)
            survey.search(ab_config.FLAGS_ADD)
        elif args.search_by_zipcode:
            survey = ABSurveyByZipcode(ab_config, args.search_by_zipcode)
            survey.search(ab_config.FLAGS_ADD)
        elif args.search_by_bounding_box:
            survey = ABSurveyByBoundingBox(ab_config, args.search_by_bounding_box)
            survey.search(ab_config.FLAGS_ADD)
        elif args.fill is not None:
            fill_loop_by_room(ab_config, args.fill)
        elif args.addsearcharea:
            ws_get_city_info(ab_config, args.addsearcharea, ab_config.FLAGS_ADD)
        elif args.addsurvey:
            db_add_survey(ab_config, args.addsurvey)
        elif args.dbping:
            db_ping(ab_config)
        elif args.displayhost:
            display_host(ab_config, args.displayhost)
        elif args.displayroom:
            display_room(ab_config, args.displayroom)
        elif args.listsearcharea:
            list_search_area_info(ab_config, args.listsearcharea)
        elif args.listroom:
            listing = ABListing(ab_config, args.listroom, None)
            listing.print_from_db()
        elif args.listsurveys:
            list_surveys(ab_config)
        elif args.printsearcharea:
            ws_get_city_info(ab_config, args.printsearcharea, ab_config.FLAGS_PRINT)
        elif args.printroom:
            listing = ABListing(ab_config, args.printroom, None)
            listing.ws_get_room_info(ab_config.FLAGS_PRINT)
        elif args.printsearch:
            survey = ABSurveyByNeighborhood(ab_config, args.printsearch)
            survey.search(ab_config.FLAGS_PRINT)
        elif args.printsearch_by_neighborhood:
            survey = ABSurveyByNeighborhood(ab_config, args.printsearch_by_neighborhood)
            survey.search(ab_config.FLAGS_PRINT)
        elif args.printsearch_by_bounding_box:
            survey = ABSurveyByBoundingBox(ab_config, args.printsearch_by_bounding_box)
            survey.search(ab_config.FLAGS_PRINT)
        elif args.printsearch_by_zipcode:
            survey = ABSurveyByZipcode(ab_config, args.printsearch_by_zipcode)
            survey.search(ab_config.FLAGS_PRINT)
        else:
            parser.print_help()
    except (SystemExit, KeyboardInterrupt):
        sys.exit()
    except Exception:
        logging.exception("Top level exception handler: quitting.")
        sys.exit(0)
def main():
    ab_config = ABConfig()
    survey_list = surveys(ab_config)
    city_views = cities(ab_config, survey_list)
    logging.debug(city_views)
    s3_dir = "s3_files"
    survey_counts = write_csv_files(ab_config, survey_list, city_views, s3_dir)
    zip_csv_files(city_views, s3_dir)
    upload_files(city_views, survey_list, s3_dir)
    write_html_file(survey_list, city_views, survey_counts)
Exemplo n.º 5
0
def main():
    parser = \
        argparse.ArgumentParser(
            description="Create a spreadsheet of surveys from a city")
    parser.add_argument("-cfg",
                        "--config_file",
                        metavar="config_file",
                        action="store",
                        default=None,
                        help="""explicitly set configuration file, instead of
                        using the default <username>.config""")
    parser.add_argument('-c',
                        '--city',
                        metavar='city',
                        action='store',
                        help="""set the city""")
    parser.add_argument(
        '-p',
        '--project',
        metavar='project',
        action='store',
        default="public",
        help="""the project determines the table or view: public
                        for room, gis for listing_city, default public""")
    parser.add_argument('-f',
                        '--format',
                        metavar='format',
                        action='store',
                        default="xlsx",
                        help="""output format (xlsx or csv), default xlsx""")
    parser.add_argument(
        '-s',
        '--summary',
        action='store_true',
        default=False,
        help="create a summary spreadsheet instead of raw data")
    parser.add_argument(
        '-sd',
        '--start_date',
        metavar="start_date",
        action='store',
        default=DEFAULT_START_DATE,
        help="create a summary spreadsheet instead of raw data")
    args = parser.parse_args()
    ab_config = ABConfig(args)

    if args.city:
        if args.summary:
            export_city_summary(ab_config, args.city, args.project.lower(),
                                args.start_date)
        else:
            export_city_data(ab_config, args.city, args.project.lower(),
                             args.format, args.start_date)
    else:
        parser.print_help()
Exemplo n.º 6
0
def main():
    """ Controlling routine that calls the others """
    config = ABConfig()
    parser = argparse.ArgumentParser(description='reverse geocode')
    # usage='%(prog)s [options]')
    # These arguments should be more carefully constructed. Right now there is
    # no defining what is required, and what is optional, and what contradicts
    # what.
    parser.add_argument("--sa",
                        metavar="search_area",
                        type=str,
                        help="""search_area""")
    parser.add_argument("--lat", metavar="lat", type=float, help="""lat""")
    parser.add_argument("--lng", metavar="lng", type=float, help="""lng""")
    parser.add_argument("--bb_n_lat",
                        metavar="bb_n_lat",
                        type=float,
                        help="""bb_n_lat""")
    parser.add_argument("--bb_s_lat",
                        metavar="bb_s_lat",
                        type=float,
                        help="""bb_s_lat""")
    parser.add_argument("--bb_e_lng",
                        metavar="bb_e_lng",
                        type=float,
                        help="""bb_e_lng""")
    parser.add_argument("--bb_w_lng",
                        metavar="bb_w_lng",
                        type=float,
                        help="""bb_w_lng""")
    parser.add_argument("--count",
                        metavar="count",
                        type=int,
                        help="""number_of_lookups""")
    args = parser.parse_args()
    search_area = args.sa
    if args.count:
        count = args.count
    else:
        count = 1000
    if search_area:
        # bb = BoundingBox.from_db(config, search_area)
        # print(bb.bb_s_lat, bb.bb_n_lat, bb.bb_w_lng, bb.bb_e_lng)
        bounding_box = BoundingBox.from_google(config, search_area)
        logger.info("Bounding box for %s = (%s, %s, %s, %s)", search_area,
                    bounding_box.bb_s_lat, bounding_box.bb_n_lat,
                    bounding_box.bb_w_lng, bounding_box.bb_e_lng)
    if args.bb_n_lat:
        bounding_box = BoundingBox.from_args(config, args)
    if not count:
        sys.exit(0)
    for lookup in range(1, count):
        location = select_lat_lng(config, bounding_box)
        if location is None:
            logger.info("No more locations")
            sys.exit(0)
        location = reverse_geocode(config, location)
        logger.debug(
            "nbhd={}, subloc={}, loc={}, l2={}, l1={}, country={}.".format(
                location.neighborhood, location.sublocality, location.locality,
                location.level2, location.level1, location.country))
        success = update_location(config, location)
        if success:
            logger.info("Update succeeded: %s, %s: %s of %s",
                        location.lat_round, location.lng_round, lookup, count)
        else:
            logger.warn("Update failed: %s, %s: %s of %s", location.lat_round,
                        location.lng_round, lookup, count)
Exemplo n.º 7
0
def ws_request(config, url, params=None):
    """
    Individual web request: returns a response object
    """
    try:
        # wait
        sleep_time = config.REQUEST_SLEEP * random.random()
        logger.debug("sleeping " + str(sleep_time)[:7] + " seconds...")
        time.sleep(sleep_time)  # be nice

        timeout = config.HTTP_TIMEOUT

        # If a list of user agent strings is supplied, use it
        if len(config.USER_AGENT_LIST) > 0:
            user_agent = random.choice(config.USER_AGENT_LIST)
            headers = {"User-Agent": user_agent}
        else:
            headers = {'User-Agent': 'Mozilla/5.0'}

        # If there is a list of proxies supplied, use it
        http_proxy = None
        logger.debug("Using " + str(len(config.HTTP_PROXY_LIST)) + " proxies.")
        if len(config.HTTP_PROXY_LIST) > 0:
            http_proxy = random.choice(config.HTTP_PROXY_LIST)
            proxies = {
                'http': http_proxy,
                'https': http_proxy,
            }
            logger.debug("Requesting page through proxy " + http_proxy)
        else:
            proxies = None

        # Now make the request
        response = requests.get(url,
                                params,
                                timeout=timeout,
                                headers=headers,
                                proxies=proxies)
        if response.status_code == 503:
            if http_proxy:
                logger.warning("503 error for proxy " + http_proxy)
                if random.choice([True, False]):
                    logger.warning(
                        "Removing {http_proxy} from proxy list. {n} proxies remaining"
                        .format(http_proxy=http_proxy,
                                n=len(config.HTTP_PROXY_LIST)))
                    config.HTTP_PROXY_LIST.remove(http_proxy)
                if len(config.HTTP_PROXY_LIST) == 0:
                    # fill proxy list again, wait a long time, then restart
                    logger.error("No proxies in list. Re-initializing.")
                    time.sleep(config.RE_INIT_SLEEP_TIME)
                    config = ABConfig()
            else:
                logger.error(
                    "Quitting on 503 error (no proxies left or not using proxies)"
                )
                sys.exit()
        return response
    except (SystemExit, KeyboardInterrupt):
        raise
    except requests.exceptions.ConnectionError:
        # For requests error and exceptions, see
        # http://docs.python-requests.org/en/latest/user/quickstart/
        # errors-and-exceptions
        logger.error("Network problem: ConnectionError")
        if random.choice([True, False]):
            if http_proxy is None or len(config.HTTP_PROXY_LIST) < 1:
                # fill the proxy list again, and wait a long time, then restart
                logger.error("No proxies left in the list. Re-initializing.")
                time.sleep(config.RE_INIT_SLEEP_TIME)  # be nice

            else:
                # remove the proxy from the proxy list
                logger.warning("Removing " + http_proxy + " from proxy list.")
                config.HTTP_PROXY_LIST.remove(http_proxy)
        return None
    except requests.exceptions.HTTPError:
        logger.error("Invalid HTTP response: HTTPError")
        return None
    except requests.exceptions.Timeout:
        logger.error("Request timed out: Timeout")
        return None
    except requests.exceptions.TooManyRedirects:
        logger.error("Too many redirects: TooManyRedirects")
        return None
    except requests.exceptions.RequestException:
        logger.error("Unidentified Requests error: RequestException")
        return None
    except Exception as e:
        logger.exception("Exception type: " + type(e).__name__)
        return None